library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)
options(
tigris_class = "sf",
tigris_use_cache = TRUE
)
Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")
Here I obtain various demographic data, including income (percent below 50% and 80% of area median income), vehicle ownership, age, English language ability, and occupants per room.
# obtain the saved census data
setwd("~/Documents/2020 Spring Quarter/CEE 218Z")
acs_vars = readRDS("censusData2018_acs_acs5.rds")
setwd("~/Documents/2020 Spring Quarter/CEE 218Z/covid19")
# load in income data - code adapted from other students
sj_median_income_by_block <-
getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "B19013_001E"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
rename(
Median_Income = B19013_001E
) %>%
filter(!is.na(Median_Income)) %>%
left_join(sj_blockgroups, by = c("blockgroup" = "GEOID")) %>% #this code gives each blockgroup a district designation
filter(
!is.na(DISTRICTS)
) %>%
# this code joins our census data with the social distancing data, processed as shown below
left_join(sj_socialdistancing %>%
filter(weekend == F) %>%
filter(date > shelter_start) %>%
group_by(origin_census_block_group) %>%
summarize(
completely_home_device_count = sum(completely_home_device_count),
device_count = sum(device_count)) %>%
mutate(`% Completely at Home` = (completely_home_device_count/device_count*100) %>% round(1),
`% not completely at home` = (100 - `% Completely at Home`)),
by = c("blockgroup" = "origin_census_block_group")
) %>%
filter(
!is.na(device_count)
) %>%
left_join(sj_pre_sd_at_home_average %>% dplyr::select(origin_census_block_group, `% Completely at Home Pre Shelter`, `% not completely at home pre shelter`), by = c("blockgroup" = "origin_census_block_group"))
sj_ami_by_block <-
getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B19001)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
group_by(blockgroup) %>%
summarize(
Total = B19001_001E,
`Under 75,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E),
#sum(lapply(2:12, function(x) as.name(paste0("B19001_00",x,"E"))))
`Under 100,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E),
`Under 125,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E, B19001_014E)
) %>%
mutate(
`% under 75,000` = `Under 75,000` / Total * 100,
`% over 75,000` = (100 - `% under 75,000`),
`% under 100,000` = `Under 100,000` / Total * 100,
`% over 100,000` = (100 - `% under 100,000`),
`% under 125,000` = `Under 125,000` / Total * 100,
`% over 125,000` = (100 - `% under 125,000`),
) %>%
left_join(sj_median_income_by_block %>% dplyr::select(-Median_Income)
) %>%
filter(!is.na(device_count))
# loading in language data - code adapted from other students
sj_lang_by_block <-
getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B16004)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(
key = "variable",
value = "estimate",
- blockgroup
) %>%
left_join(acs_vars, by = c("variable" = "name")) %>%
mutate(
tier = substr(label,lapply(label, function(x) max(unlist(gregexpr('!!',x)))+2),nchar(label))
) %>%
filter(tier %in% c('Speak English "not well"',
'Speak English "not at all"',
'Total', 'Speak Spanish',
'Speak Asian and Pacific Island languages')) %>%
group_by(blockgroup, tier) %>%
summarise(
estimate1 = sum(estimate)
) %>%
spread(
key = "tier",
value = "estimate1"
) %>%
mutate(
`% speaking english < well` = (`Speak English "not well"` + `Speak English "not at all"`) / Total * 100,
`% speaking english > well` = (100 - `% speaking english < well`),
`% speaking spanish` = (`Speak Spanish`/ Total) * 100,
`% not speaking spanish` = (100 - `% speaking spanish`),
`% speaking api` = (`Speak Asian and Pacific Island languages` / Total) * 100
) %>%
left_join(sj_median_income_by_block %>% dplyr::select(-Median_Income)) %>%
filter(!is.na(device_count)) %>%
mutate(log_perc = log(`% speaking english < well`))
# loading in age data - specifically looking at percentage 65+ and percentage <30
sj_age_by_block <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B01001)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(
key = "variable",
value = "estimate",
- blockgroup
) %>%
mutate(
label = acs_vars$label[match(variable,acs_vars$name)]
) %>%
dplyr::select(-variable) %>%
separate(
label,
into = c(NA,NA,"sex","age"),
sep = "!!"
) %>% filter(!is.na(age)) %>%
mutate(elderly = ifelse(age %in% c("65 and 66 years", "67 to 69 years", "70 to 74 years", "75 to 79 years", "80 to 84 years", "85 years and over"), estimate, NA), `less than 30` = ifelse(age %in% c("Under 5 years", "5 to 9 years", "10 to 14 years", "15 to 17 years", "18 and 19 years", "20 years", "21 years", "22 to 24 years", "25 to 29 years"), estimate, NA), `less than 18` = ifelse(age %in% c("Under 5 years", "5 to 9 years", "10 to 14 years", "15 to 17 years"), estimate, NA), `20-29` = ifelse(age %in% c("20 years", "21 years", "22 to 24 years", "25 to 29 years"), estimate, NA)) %>%
group_by(blockgroup) %>%
summarize(elderly = sum(elderly, na.rm = T), `less than 30` = sum(`less than 30`, na.rm = T), total = sum(estimate, na.rm = T), `less than 18` = sum(`less than 18`, na.rm = T), `20-29` = sum(`20-29`, na.rm = T)) %>%
mutate(`percent elderly` = elderly*100 / total, `percent less than 30` = `less than 30`*100 / total, `percent nonelderly` = (100 - `percent elderly`), `percent less than 18` = `less than 18`*100/total, `percent 20-29` = `20-29`*100/total) %>%
left_join(sj_median_income_by_block %>% dplyr::select(-Median_Income)) %>%
filter(!is.na(device_count))
# keep all age categories separated
sj_all_age_by_block <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B01001)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(
key = "variable",
value = "estimate",
- blockgroup
) %>%
mutate(
label = acs_vars$label[match(variable,acs_vars$name)]
) %>%
dplyr::select(-variable) %>%
separate(
label,
into = c(NA,NA,"sex","age"),
sep = "!!"
) %>% filter(!is.na(age)) %>%
group_by(blockgroup, age) %>%
summarize(total_by_age = sum(estimate)) %>%
spread(key = age, value = total_by_age) %>%
left_join(sj_age_by_block %>% dplyr::select(blockgroup, total)) %>%
left_join(sj_median_income_by_block %>% dplyr::select(device_count, blockgroup)) %>%
filter(!is.na(device_count)) %>%
dplyr::select(-device_count)
# get data on vehicles available as vehicles allocation
sj_vehicles_by_block <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B992512)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
dplyr::select(B992512_001E, blockgroup) %>%
rename(total_vehicles = B992512_001E, blockgroup = blockgroup) %>%
left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
mutate(`vehicles per capita` = total_vehicles / total) %>%
filter(!is.na(device_count))
# also get data on vehicles available as households without a vehicle
sj_no_vehicles_by_block <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B25044)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, NA,"vehicles"), sep = "!!") %>%
filter(!is.na(vehicles)) %>%
group_by(blockgroup, vehicles) %>%
summarize(grouped_vehicles = sum(estimate)) %>%
spread(key = vehicles, value = grouped_vehicles) %>%
mutate(total_nums = `1 vehicle available` + `2 vehicles available` + `3 vehicles available` + `4 vehicles available` + `5 or more vehicles available` + `No vehicle available`, `percent no vehicles` = `No vehicle available`*100 / total_nums, `percent with vehicles` = (100-`percent no vehicles`)) %>%
left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
# get data on occupants per room
sj_occupants_per_room_by_block <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B25014)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, NA,"occupants per room"), sep = "!!") %>%
filter(!is.na(`occupants per room`)) %>%
group_by(blockgroup, `occupants per room`) %>%
summarize(estimate_tot = sum(estimate)) %>%
spread(key = `occupants per room`, value = estimate_tot) %>%
mutate(total_nums = `0.50 or less occupants per room` + `0.51 to 1.00 occupants per room` + `1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`, `percent 1 or more` = (`1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`) * 100/ total_nums, `percent less than 1` = (100-`percent 1 or more`)) %>%
left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
In the plots below, I show the selected variables against percent of devices completely at home since the shelter-in-place order started, as well as against percent of devices pre-shelter-in-place for comparison.
# age
sj_age_by_block %>%
ggplot(aes(
x = `percent less than 30`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents younger than 30",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Young Age Groups"
)
young_model <- lm(sj_age_by_block$`% not completely at home` ~ sj_age_by_block$`percent less than 30`)
summary(young_model)
##
## Call:
## lm(formula = sj_age_by_block$`% not completely at home` ~ sj_age_by_block$`percent less than 30`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.305 -4.595 -0.326 4.013 39.401
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 44.89889 1.46159 30.719 < 2e-16 ***
## sj_age_by_block$`percent less than 30` 0.17542 0.03705 4.735 2.77e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.014 on 567 degrees of freedom
## Multiple R-squared: 0.03803, Adjusted R-squared: 0.03634
## F-statistic: 22.42 on 1 and 567 DF, p-value: 2.775e-06
sj_age_by_block %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
ggplot(aes(
x = `percent elderly`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents 65 and older",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Elderly Population"
)
elderly_model <- lm(`% not completely at home` ~ `percent elderly`, sj_age_by_block %>% filter(`percent elderly` < 50))
summary(elderly_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `percent elderly`,
## data = sj_age_by_block %>% filter(`percent elderly` < 50))
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.329 -4.899 -0.267 4.127 34.323
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 53.82116 0.75636 71.158 < 2e-16 ***
## `percent elderly` -0.17173 0.05223 -3.288 0.00107 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.077 on 564 degrees of freedom
## Multiple R-squared: 0.01881, Adjusted R-squared: 0.01707
## F-statistic: 10.81 on 1 and 564 DF, p-value: 0.001071
sj_age_by_block %>%
ggplot(aes(
x = `percent less than 18`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents less than 18",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Child Population"
)
child_model <- lm(`% not completely at home` ~ `percent less than 18`, sj_age_by_block)
summary(child_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `percent less than 18`,
## data = sj_age_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.573 -5.033 -0.242 4.352 39.666
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 54.21192 1.18234 45.852 <2e-16 ***
## `percent less than 18` -0.11474 0.05038 -2.277 0.0231 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.134 on 567 degrees of freedom
## Multiple R-squared: 0.009064, Adjusted R-squared: 0.007316
## F-statistic: 5.186 on 1 and 567 DF, p-value: 0.02314
sj_age_by_block %>%
ggplot(aes(
x = `percent 20-29`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents ages 20-29",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Population Ages 20-29"
)
young_adult_model <- lm(`% not completely at home` ~ `percent 20-29`, sj_age_by_block)
summary(young_adult_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `percent 20-29`, data = sj_age_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.315 -4.621 -0.302 4.467 39.461
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.23891 0.65596 73.539 < 2e-16 ***
## `percent 20-29` 0.24490 0.04081 6.002 3.49e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.923 on 567 degrees of freedom
## Multiple R-squared: 0.05973, Adjusted R-squared: 0.05807
## F-statistic: 36.02 on 1 and 567 DF, p-value: 3.486e-09
# compare this to pre-shelter-in-place behavior
sj_age_by_block %>%
ggplot(aes(
x = `percent less than 30`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents younger than 30",
y = "Percent devices leaving home pre-shelter-in-place",
title = "San Jose: Staying at Home and Young Age Groups Pre Shelter-in-Place"
)
young_model2 <- lm(sj_age_by_block$`% not completely at home pre shelter` ~ sj_age_by_block$`percent less than 30`)
summary(young_model2)
##
## Call:
## lm(formula = sj_age_by_block$`% not completely at home pre shelter` ~
## sj_age_by_block$`percent less than 30`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.1939 -2.8160 -0.1557 2.9950 16.7071
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 81.87032 0.82253 99.53 < 2e-16 ***
## sj_age_by_block$`percent less than 30` -0.11072 0.02085 -5.31 1.57e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.51 on 567 degrees of freedom
## Multiple R-squared: 0.04738, Adjusted R-squared: 0.0457
## F-statistic: 28.2 on 1 and 567 DF, p-value: 1.573e-07
sj_age_by_block %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
ggplot(aes(
x = `percent elderly`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents 65 and older",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "San Jose: Staying at Home and Elderly Population Pre Shelter-in-Place"
)
elderly_model2 <- lm(`% not completely at home pre shelter` ~ `percent elderly`, sj_age_by_block %>% filter(`percent elderly` < 50))
summary(elderly_model2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `percent elderly`,
## data = sj_age_by_block %>% filter(`percent elderly` < 50))
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.236 -2.830 -0.158 3.145 14.296
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 75.9045 0.4257 178.295 < 2e-16 ***
## `percent elderly` 0.1329 0.0294 4.522 7.47e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.546 on 564 degrees of freedom
## Multiple R-squared: 0.03499, Adjusted R-squared: 0.03328
## F-statistic: 20.45 on 1 and 564 DF, p-value: 7.466e-06
sj_age_by_block %>%
ggplot(aes(
x = `percent less than 18`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents less than 18",
y = "Percent devices leaving home on weekdays pre shelter-in-place",
title = "San Jose: Social Distancing and Child Population Pre Shelter"
)
child_model2 <- lm(`% not completely at home pre shelter` ~ `percent less than 18`, sj_age_by_block)
summary(child_model2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `percent less than 18`,
## data = sj_age_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.3901 -3.0050 0.0411 3.1602 12.4458
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 76.08044 0.66828 113.845 <2e-16 ***
## `percent less than 18` 0.06849 0.02848 2.405 0.0165 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.597 on 567 degrees of freedom
## Multiple R-squared: 0.0101, Adjusted R-squared: 0.008352
## F-statistic: 5.784 on 1 and 567 DF, p-value: 0.01649
sj_age_by_block %>%
ggplot(aes(
x = `percent 20-29`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents ages 20-29",
y = "Percent devices leaving home on weekdays pre shelter-in-place",
title = "San Jose: Social Distancing and Population Ages 20-29 Pre Shelter"
)
young_adult_model2 <- lm(`% not completely at home pre shelter` ~ `percent 20-29`, sj_age_by_block)
summary(young_adult_model2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `percent 20-29`,
## data = sj_age_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.0695 -2.7320 -0.1156 2.8283 15.9003
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 80.23612 0.36071 222.440 < 2e-16 ***
## `percent 20-29` -0.18877 0.02244 -8.413 3.26e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.357 on 567 degrees of freedom
## Multiple R-squared: 0.111, Adjusted R-squared: 0.1094
## F-statistic: 70.78 on 1 and 567 DF, p-value: 3.264e-16
# income - less than $75000
sj_ami_by_block %>%
ggplot(aes(
x = `% over 75,000`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $75,000 (50% AMI) annually",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Households Above 50% AMI"
)
income_75_model <- lm(`% not completely at home` ~ `% over 75,000`, sj_ami_by_block)
summary(income_75_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `% over 75,000`, data = sj_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.068 -4.630 -0.633 4.135 32.635
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 64.27172 1.07390 59.85 <2e-16 ***
## `% over 75,000` -0.20381 0.01655 -12.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.169 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2113, Adjusted R-squared: 0.2099
## F-statistic: 151.6 on 1 and 566 DF, p-value: < 2.2e-16
# income - less than $100000
sj_ami_by_block %>%
ggplot(aes(
x = `% over 100,000`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $100,000 (80% AMI) annually",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Households Above 80% AMI"
)
income_100_model <- lm(`% not completely at home` ~ `% over 100,000`, sj_ami_by_block)
summary(income_100_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `% over 100,000`, data = sj_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.5007 -4.7327 -0.3607 3.8991 31.0438
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.80826 0.83877 73.69 <2e-16 ***
## `% over 100,000` -0.20048 0.01537 -13.04 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.078 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.231, Adjusted R-squared: 0.2297
## F-statistic: 170.1 on 1 and 566 DF, p-value: < 2.2e-16
# income - less than $125000
sj_ami_by_block %>%
ggplot(aes(
x = `% over 125,000`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $125,000 annually",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Households Above $125,000"
)
income_125_model <- lm(`% not completely at home` ~ `% over 125,000`, sj_ami_by_block)
summary(income_125_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `% over 125,000`, data = sj_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.9783 -4.4021 -0.6648 3.8966 30.1129
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 60.09270 0.70606 85.11 <2e-16 ***
## `% over 125,000` -0.20695 0.01558 -13.28 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.048 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2376, Adjusted R-squared: 0.2362
## F-statistic: 176.3 on 1 and 566 DF, p-value: < 2.2e-16
# compare to pre shelter in place
sj_ami_by_block %>%
ggplot(aes(
x = `% over 75,000`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $75,000 (50% AMI) annually",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "San Jose: Staying at Home and Households Above 50% AMI Pre Shelter-in-Place"
)
income_75_model2 <- lm(`% not completely at home pre shelter` ~ `% over 75,000`, sj_ami_by_block)
summary(income_75_model2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `% over 75,000`,
## data = sj_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.4357 -2.7003 -0.1437 2.7764 16.6680
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.61447 0.65712 110.504 < 2e-16 ***
## `% over 75,000` 0.08029 0.01013 7.926 1.21e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.386 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.09991, Adjusted R-squared: 0.09832
## F-statistic: 62.83 on 1 and 566 DF, p-value: 1.206e-14
# income - less than $100000
sj_ami_by_block %>%
ggplot(aes(
x = `% over 100,000`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $100,000 (80% AMI) annually",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "San Jose: Staying Home and Households Below 80% AMI Pre Shelter-in-Place"
)
income_100_model2 <- lm(`% not completely at home pre shelter` ~ `% over 100,000`, sj_ami_by_block)
summary(income_100_model2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `% over 100,000`,
## data = sj_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.5034 -2.6406 0.0803 2.5599 16.9387
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 73.26132 0.51177 143.152 <2e-16 ***
## `% over 100,000` 0.08532 0.00938 9.096 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.319 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1275, Adjusted R-squared: 0.126
## F-statistic: 82.73 on 1 and 566 DF, p-value: < 2.2e-16
# over 125000
sj_ami_by_block %>%
ggplot(aes(
x = `% over 125,000`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $125,000 annually",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "San Jose: Social Distancing and Households Below $125,000 Pre Shelter-in-Place"
)
income_125_model2 <- lm(`% not completely at home pre shelter` ~ `% over 125,000`, sj_ami_by_block)
summary(income_125_model2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `% over 125,000`,
## data = sj_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.353 -2.556 0.022 2.522 16.560
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 73.640242 0.425069 173.2 <2e-16 ***
## `% over 125,000` 0.096607 0.009382 10.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.243 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1578, Adjusted R-squared: 0.1563
## F-statistic: 106 on 1 and 566 DF, p-value: < 2.2e-16
# language
sj_lang_by_block %>%
ggplot(aes(
x = `% speaking english > well`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals speaking English well",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and English Language Ability"
)
english_ability_model <- lm(`% not completely at home` ~ `% speaking english > well`, sj_lang_by_block)
summary(english_ability_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `% speaking english > well`,
## data = sj_lang_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.368 -4.739 -0.403 3.890 37.997
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 66.80735 3.24864 20.565 < 2e-16 ***
## `% speaking english > well` -0.17105 0.03642 -4.696 3.33e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.017 on 567 degrees of freedom
## Multiple R-squared: 0.03744, Adjusted R-squared: 0.03574
## F-statistic: 22.05 on 1 and 567 DF, p-value: 3.332e-06
sj_lang_by_block %>%
ggplot(aes(
x = `% not speaking spanish`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals not speaking Spanish",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Spanish Language Ability"
)
spanish_speaking_model <- lm(`% not completely at home` ~ `% not speaking spanish`, sj_lang_by_block)
summary(spanish_speaking_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `% not speaking spanish`,
## data = sj_lang_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.273 -4.268 -0.575 3.458 37.202
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 63.87866 1.26890 50.342 <2e-16 ***
## `% not speaking spanish` -0.15750 0.01581 -9.964 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.538 on 567 degrees of freedom
## Multiple R-squared: 0.149, Adjusted R-squared: 0.1475
## F-statistic: 99.28 on 1 and 567 DF, p-value: < 2.2e-16
# compare to pre shelter in place
sj_lang_by_block %>%
ggplot(aes(
x = `% speaking english > well`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals speaking English well",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "San Jose: Staying at Home and English Language Ability Pre Shelter-in-Place"
)
english_ability_model2 <- lm(`% not completely at home pre shelter` ~ `% speaking english > well`, sj_lang_by_block)
summary(english_ability_model2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `% speaking english > well`,
## data = sj_lang_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.9364 -2.4342 0.0388 3.0316 12.3011
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 60.84131 1.73337 35.100 <2e-16 ***
## `% speaking english > well` 0.18913 0.01943 9.732 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.277 on 567 degrees of freedom
## Multiple R-squared: 0.1431, Adjusted R-squared: 0.1416
## F-statistic: 94.7 on 1 and 567 DF, p-value: < 2.2e-16
sj_lang_by_block %>%
ggplot(aes(
x = `% not speaking spanish`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals not speaking Spanish",
y = "Percent devices leaving home on weekdays pre shelter-in-place",
title = "San Jose: Staying at Home and Spanish Language Ability Pre Shelter-in-Place"
)
spanish_speaking_model2 <- lm(`% not completely at home pre shelter` ~ `% not speaking spanish`, sj_lang_by_block)
summary(spanish_speaking_model2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `% not speaking spanish`,
## data = sj_lang_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.793 -2.540 -0.002 2.680 11.988
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.228200 0.726831 97.998 <2e-16 ***
## `% not speaking spanish` 0.082206 0.009054 9.079 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.318 on 567 degrees of freedom
## Multiple R-squared: 0.1269, Adjusted R-squared: 0.1254
## F-statistic: 82.43 on 1 and 567 DF, p-value: < 2.2e-16
# occupants per room
sj_occupants_per_room_by_block %>%
ggplot(aes(
x = `percent less than 1`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households with 1 or fewer occupant per room",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Room Occupancy"
)
occupants_model <- lm(`% not completely at home` ~ `percent less than 1`, sj_occupants_per_room_by_block)
summary(occupants_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `percent less than 1`,
## data = sj_occupants_per_room_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.498 -4.774 -0.098 3.776 34.802
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 70.73449 2.84276 24.882 < 2e-16 ***
## `percent less than 1` -0.21236 0.03131 -6.783 2.97e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.762 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.07518, Adjusted R-squared: 0.07355
## F-statistic: 46.01 on 1 and 566 DF, p-value: 2.969e-11
# compare to pre shelter in place
sj_occupants_per_room_by_block %>%
ggplot(aes(
x = `percent less than 1`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households with 1 or fewer occupant per room",
y = "Percent devices leaving home on weekdays pre shelter-in-place",
title = "San Jose: Staying at Home and Room Occupancy Pre Shelter-in-Place"
)
occupants_model2 <- lm(`% not completely at home pre shelter` ~ `percent less than 1`, sj_occupants_per_room_by_block)
summary(occupants_model2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `percent less than 1`,
## data = sj_occupants_per_room_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.3246 -2.6506 -0.2808 2.7536 17.0509
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.88485 1.57437 39.943 <2e-16 ***
## `percent less than 1` 0.16329 0.01734 9.418 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.299 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1355, Adjusted R-squared: 0.134
## F-statistic: 88.7 on 1 and 566 DF, p-value: < 2.2e-16
# vehicles
sj_vehicles_by_block %>%
ggplot(aes(
x = `vehicles per capita`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Number of vehicles per capita",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Vehicles Per Capita"
)
# vehicles - percent with no vehicles
sj_no_vehicles_by_block %>%
ggplot(aes(
x = `percent with vehicles`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with vehicles available",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Vehicle Availability"
)
vehicles_model <- lm(`% not completely at home` ~ `percent with vehicles`, sj_no_vehicles_by_block)
summary(vehicles_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `percent with vehicles`,
## data = sj_no_vehicles_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.480 -4.684 -0.322 4.556 37.220
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 73.22880 4.98157 14.700 < 2e-16 ***
## `percent with vehicles` -0.22749 0.05223 -4.356 1.57e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.94 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.03243, Adjusted R-squared: 0.03072
## F-statistic: 18.97 on 1 and 566 DF, p-value: 1.574e-05
# compare to pre shelter in place
sj_no_vehicles_by_block %>%
ggplot(aes(
x = `percent with vehicles`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with vehicles available",
y = "Percent devices leaving home on weekdays pre shelter-in-place",
title = "San Jose: Social Distancing and Vehicle Availability Pre Shelter-in-Place"
)
vehicles_model2 <- lm(`% not completely at home pre shelter` ~ `percent with vehicles`, sj_no_vehicles_by_block)
summary(vehicles_model2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `percent with vehicles`,
## data = sj_no_vehicles_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.5618 -2.9606 -0.0694 3.0006 12.6053
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 63.25942 2.83717 22.297 < 2e-16 ***
## `percent with vehicles` 0.15084 0.02975 5.071 5.37e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.522 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.04346, Adjusted R-squared: 0.04177
## F-statistic: 25.72 on 1 and 566 DF, p-value: 5.37e-07
I also consider education and internet access, based on previous research. ## Education
sj_education_by_block <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B15003)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "education level"), sep = "!!") %>%
mutate(`education level` = replace_na(`education level`, "total_educ")) %>% # if the education level field is NA, this corresponded to the total number in that blockgroup
spread(key = `education level`, value = estimate) %>%
mutate(`percent associates or higher` = (`Associate's degree` + `Bachelor's degree` + `Doctorate degree` + `Master's degree`)*100/total_educ, `percent less than associates` = 100-`percent associates or higher`) %>%
left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
# plotting
sj_education_by_block %>%
ggplot(aes(
x = `percent associates or higher`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of people with an degree at Associate's level or higher",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Education"
)
educ_model <- lm(`% not completely at home` ~ `percent associates or higher`, sj_education_by_block)
summary(educ_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `percent associates or higher`,
## data = sj_education_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.467 -4.450 -0.851 3.367 42.741
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 60.62394 0.80580 75.23 <2e-16 ***
## `percent associates or higher` -0.19065 0.01581 -12.06 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.29 on 567 degrees of freedom
## Multiple R-squared: 0.2041, Adjusted R-squared: 0.2027
## F-statistic: 145.4 on 1 and 567 DF, p-value: < 2.2e-16
# compare to pre shelter in place
sj_education_by_block %>%
ggplot(aes(
x = `percent associates or higher`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of people with an degree at Associate's level or higher",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "San Jose: Social Distancing and Education Pre Shelter-in-Place"
)
educ_model2 <- lm(`% not completely at home pre shelter` ~ `percent associates or higher`, sj_education_by_block)
summary(educ_model2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `percent associates or higher`,
## data = sj_education_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.2224 -2.5458 0.0672 2.7859 13.9180
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 73.547853 0.476189 154.45 <2e-16 ***
## `percent associates or higher` 0.086343 0.009344 9.24 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.308 on 567 degrees of freedom
## Multiple R-squared: 0.1309, Adjusted R-squared: 0.1293
## F-statistic: 85.39 on 1 and 567 DF, p-value: < 2.2e-16
sj_internet_by_block <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B28002)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "subscription", "type", "additional"), sep = "!!") %>%
filter(is.na(subscription) | (type == "Broadband such as cable, fiber optic or DSL") & is.na(additional)) %>%
mutate(type = replace_na(type, "total_num")) %>%
dplyr::select(blockgroup, type, estimate) %>%
spread(key = type, value = estimate) %>%
mutate(`percent high speed` = `Broadband such as cable, fiber optic or DSL`*100/total_num, `percent no high speed` = 100-`percent high speed`) %>%
left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
# plotting
sj_internet_by_block %>%
ggplot(aes(
x = `percent high speed`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households with broadband such as cable, fiber optic or DSL",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and High Speed Internet"
)
internet_model <- lm(`% not completely at home` ~ `percent high speed`, sj_internet_by_block)
summary(internet_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `percent high speed`,
## data = sj_internet_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.898 -4.532 -0.338 3.751 37.696
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.35009 2.14773 33.687 <2e-16 ***
## `percent high speed` -0.25746 0.02634 -9.776 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.466 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1445, Adjusted R-squared: 0.1429
## F-statistic: 95.57 on 1 and 566 DF, p-value: < 2.2e-16
# compare to pre-shelter-in-place behavior
sj_internet_by_block %>%
ggplot(aes(
x = `percent high speed`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households with broadband such as cable, fiber optic or DSL",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "San Jose: Social Distancing and High Speed Internet Pre Shelter-in-Place"
)
internet_model2 <- lm(`% not completely at home pre shelter` ~ `percent high speed`, sj_internet_by_block)
summary(internet_model2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `percent high speed`,
## data = sj_internet_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.5915 -2.8440 -0.1157 2.8288 16.8750
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 69.1067 1.2800 53.990 < 2e-16 ***
## `percent high speed` 0.1055 0.0157 6.719 4.48e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.449 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.07386, Adjusted R-squared: 0.07223
## F-statistic: 45.14 on 1 and 566 DF, p-value: 4.477e-11
# use LODES data
sj_sex_workers_by_block <- grab_lodes(
state = "ca",
year = 2017,
lodes_type = "rac",
job_type = "JT01",
segment = "S000",
state_part = "main",
agg_geo = "bg"
) %>%
left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
filter(!is.na(DISTRICTS)) %>%
dplyr::select(CS01, CS02, h_bg) %>%
rename(male = CS01, female = CS02, blockgroup = h_bg) %>%
mutate(total_workers = male + female, `% male workers` = male*100 / total_workers) %>%
left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
# plotting
sj_sex_workers_by_block %>%
ggplot(aes(
x = `% male workers`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of workers that are male",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Sex of Workers"
)
sex_workers_model <- lm(`% not completely at home` ~ `% male workers`, sj_sex_workers_by_block)
summary(sex_workers_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `% male workers`, data = sj_sex_workers_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.279 -4.953 -0.340 4.285 34.274
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.4653 6.9208 9.893 <2e-16 ***
## `% male workers` -0.3133 0.1290 -2.430 0.0154 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.087 on 566 degrees of freedom
## Multiple R-squared: 0.01032, Adjusted R-squared: 0.008573
## F-statistic: 5.903 on 1 and 566 DF, p-value: 0.01543
# compare to pre-shelter-in-place behavior
sj_sex_workers_by_block %>%
ggplot(aes(
x = `% male workers`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of workers that are male",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "San Jose: Social Distancing and Sex of Workers Pre Shelter-in-Place"
)
sex_workers_model_2 <- lm(`% not completely at home pre shelter` ~ `% male workers`, sj_sex_workers_by_block)
summary(sex_workers_model_2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `% male workers`,
## data = sj_sex_workers_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.5200 -3.1184 -0.1399 3.0954 12.4701
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 78.61111 3.95446 19.879 <2e-16 ***
## `% male workers` -0.01836 0.07369 -0.249 0.803
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.621 on 566 degrees of freedom
## Multiple R-squared: 0.0001096, Adjusted R-squared: -0.001657
## F-statistic: 0.06207 on 1 and 566 DF, p-value: 0.8033
This does not seem to be significant.
sj_race_by_block <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B02001)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "race", "specification"), sep = "!!") %>%
filter(is.na(specification) & !is.na(race)) %>%
dplyr::select(blockgroup, estimate, race) %>%
spread(key = race, value = estimate) %>%
mutate(total_race = `American Indian and Alaska Native alone` + `Asian alone` + `Black or African American alone` + `Native Hawaiian and Other Pacific Islander alone` + `Some other race alone` + `Two or more races` + `White alone`, `% white` = `White alone`*100/total_race, `% Asian` = `Asian alone`*100/total_race, `% black` = `Black or African American alone`*100/total_race) %>%
left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
# also get ethnicity data (hispanic/latino vs not)
sj_hisplat_by_block <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B03002)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "hisp/lat", "specification"), sep = "!!") %>%
filter(is.na(specification) & !is.na(`hisp/lat`)) %>%
dplyr::select(blockgroup, estimate, `hisp/lat`) %>%
spread(key = `hisp/lat`, value = estimate) %>%
mutate(`% non hispanic/latino` = `Not Hispanic or Latino`*100/(`Hispanic or Latino` + `Not Hispanic or Latino`))
# join with the race data
sj_race_by_block <- sj_race_by_block %>% left_join(sj_hisplat_by_block %>% dplyr::select(blockgroup, `% non hispanic/latino`))
# plotting
# percent white
sj_race_by_block %>%
ggplot(aes(
x = `% white`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are white",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and White Residents"
)
white_model <- lm(`% not completely at home` ~ `% white`, sj_race_by_block)
summary(white_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `% white`, data = sj_race_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.244 -4.791 -0.064 4.373 35.195
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.13101 0.75798 66.14 <2e-16 ***
## `% white` 0.03490 0.01572 2.22 0.0268 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.136 on 567 degrees of freedom
## Multiple R-squared: 0.008617, Adjusted R-squared: 0.006868
## F-statistic: 4.928 on 1 and 567 DF, p-value: 0.02682
# compare to pre-shelter-in-place behavior
sj_race_by_block %>%
ggplot(aes(
x = `% white`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are white",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "San Jose: Social Distancing and White Residents Pre Shelter-in-Place"
)
white_model2 <- lm(`% not completely at home pre shelter` ~ `% white`, sj_race_by_block)
summary(white_model2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `% white`,
## data = sj_race_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.2995 -2.8302 -0.0347 2.8574 12.5621
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 74.834803 0.410114 182.473 < 2e-16 ***
## `% white` 0.064673 0.008506 7.603 1.21e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.402 on 567 degrees of freedom
## Multiple R-squared: 0.09251, Adjusted R-squared: 0.09091
## F-statistic: 57.8 on 1 and 567 DF, p-value: 1.209e-13
# percent Asian
sj_race_by_block %>%
ggplot(aes(
x = `% Asian`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are Asian",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Asian Residents"
)
asian_model <- lm(`% not completely at home` ~ `% Asian`, sj_race_by_block)
summary(asian_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `% Asian`, data = sj_race_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.471 -4.820 -0.566 4.453 35.882
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.63327 0.56462 100.30 <2e-16 ***
## `% Asian` -0.15404 0.01448 -10.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.461 on 567 degrees of freedom
## Multiple R-squared: 0.1663, Adjusted R-squared: 0.1648
## F-statistic: 113.1 on 1 and 567 DF, p-value: < 2.2e-16
# compare to pre-shelter-in-place behavior
sj_race_by_block %>%
ggplot(aes(
x = `% Asian`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are Asian",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "San Jose: Social Distancing and Asian Residents Pre Shelter-in-Place"
)
asian_model2 <- lm(`% not completely at home pre shelter` ~ `% Asian`, sj_race_by_block)
summary(asian_model2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `% Asian`,
## data = sj_race_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.6447 -3.0068 0.0283 3.2005 12.1305
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 78.069520 0.348967 223.72 <2e-16 ***
## `% Asian` -0.013871 0.008952 -1.55 0.122
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.611 on 567 degrees of freedom
## Multiple R-squared: 0.004217, Adjusted R-squared: 0.00246
## F-statistic: 2.401 on 1 and 567 DF, p-value: 0.1218
# percent non hispanic/latino
sj_race_by_block %>%
ggplot(aes(
x = `% non hispanic/latino`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are not Hispanic or Latino",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Hispanic/Latino Residents"
)
hisp_model <- lm(`% not completely at home` ~ `% non hispanic/latino`, sj_race_by_block)
summary(hisp_model)
##
## Call:
## lm(formula = `% not completely at home` ~ `% non hispanic/latino`,
## data = sj_race_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.405 -4.560 -0.744 3.687 37.778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.51504 0.97785 63.93 <2e-16 ***
## `% non hispanic/latino` -0.15993 0.01364 -11.72 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.331 on 567 degrees of freedom
## Multiple R-squared: 0.1951, Adjusted R-squared: 0.1936
## F-statistic: 137.4 on 1 and 567 DF, p-value: < 2.2e-16
# compare to pre-shelter-in-place behavior
sj_race_by_block %>%
ggplot(aes(
x = `% non hispanic/latino`,
y = `% not completely at home pre shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are not Hispanic or Latino",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "San Jose: Social Distancing and Hispanic/Latino Residents Pre Shelter-in-Place"
)
hisp_model2 <- lm(`% not completely at home pre shelter` ~ `% non hispanic/latino`, sj_race_by_block)
summary(hisp_model2)
##
## Call:
## lm(formula = `% not completely at home pre shelter` ~ `% non hispanic/latino`,
## data = sj_race_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.6826 -2.6760 -0.0257 3.0167 16.7253
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 73.005160 0.581580 125.529 < 2e-16 ***
## `% non hispanic/latino` 0.067819 0.008115 8.357 4.97e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.36 on 567 degrees of freedom
## Multiple R-squared: 0.1097, Adjusted R-squared: 0.1081
## F-statistic: 69.85 on 1 and 567 DF, p-value: 4.967e-16
# multiple regression
modeltest <- lm(sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` + sj_age_by_block$`percent less than 30` + sj_lang_by_block$`% speaking english > well` + sj_occupants_per_room_by_block$`percent less than 1`)
summary(modeltest)
##
## Call:
## lm(formula = sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` +
## sj_age_by_block$`percent less than 30` + sj_lang_by_block$`% speaking english > well` +
## sj_occupants_per_room_by_block$`percent less than 1`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.2655 -4.3384 -0.7492 3.6871 29.9600
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 51.357400 4.574598
## sj_ami_by_block$`% over 125,000` -0.236228 0.020629
## sj_age_by_block$`percent less than 30` 0.007948 0.040347
## sj_lang_by_block$`% speaking english > well` 0.145889 0.044201
## sj_occupants_per_room_by_block$`percent less than 1` -0.036662 0.044226
## t value Pr(>|t|)
## (Intercept) 11.227 < 2e-16 ***
## sj_ami_by_block$`% over 125,000` -11.451 < 2e-16 ***
## sj_age_by_block$`percent less than 30` 0.197 0.84391
## sj_lang_by_block$`% speaking english > well` 3.301 0.00103 **
## sj_occupants_per_room_by_block$`percent less than 1` -0.829 0.40747
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.995 on 563 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.253, Adjusted R-squared: 0.2477
## F-statistic: 47.67 on 4 and 563 DF, p-value: < 2.2e-16
educ_income_model <- lm(sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` + sj_education_by_block$`percent associates or higher`)
summary(educ_income_model)
##
## Call:
## lm(formula = sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` +
## sj_education_by_block$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.6193 -4.4926 -0.8443 3.6379 31.3968
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 61.64422 0.79199
## sj_ami_by_block$`% over 125,000` -0.14437 0.02163
## sj_education_by_block$`percent associates or higher` -0.08741 0.02126
## t value Pr(>|t|)
## (Intercept) 77.835 < 2e-16 ***
## sj_ami_by_block$`% over 125,000` -6.675 5.92e-11 ***
## sj_education_by_block$`percent associates or higher` -4.112 4.50e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.951 on 565 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2597, Adjusted R-squared: 0.2571
## F-statistic: 99.11 on 2 and 565 DF, p-value: < 2.2e-16
educ_income_model <- lm(sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` + sj_internet_by_block$`percent high speed`)
summary(educ_income_model)
##
## Call:
## lm(formula = sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` +
## sj_internet_by_block$`percent high speed`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.1458 -4.4058 -0.5434 3.8081 29.8800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 64.83584 2.19898 29.485 <2e-16
## sj_ami_by_block$`% over 125,000` -0.17672 0.02043 -8.649 <2e-16
## sj_internet_by_block$`percent high speed` -0.07421 0.03260 -2.277 0.0232
##
## (Intercept) ***
## sj_ami_by_block$`% over 125,000` ***
## sj_internet_by_block$`percent high speed` *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.022 on 565 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2445, Adjusted R-squared: 0.2418
## F-statistic: 91.42 on 2 and 565 DF, p-value: < 2.2e-16
income_spanish_model <- lm(sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` + sj_lang_by_block$`% not speaking spanish`)
summary(income_spanish_model)
##
## Call:
## lm(formula = sj_ami_by_block$`% not completely at home` ~ sj_ami_by_block$`% over 125,000` +
## sj_lang_by_block$`% not speaking spanish`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.1271 -4.4548 -0.7803 3.5867 29.8008
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.77452 1.18514 52.968 < 2e-16
## sj_ami_by_block$`% over 125,000` -0.17229 0.01981 -8.698 < 2e-16
## sj_lang_by_block$`% not speaking spanish` -0.05281 0.01881 -2.808 0.00515
##
## (Intercept) ***
## sj_ami_by_block$`% over 125,000` ***
## sj_lang_by_block$`% not speaking spanish` **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.006 on 565 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.248, Adjusted R-squared: 0.2454
## F-statistic: 93.19 on 2 and 565 DF, p-value: < 2.2e-16
This suggests that once controlling for income, Spanish language ability is no longer a strong predictor of leaving home during the shelter-in-place order.
Now I consider looking at correlations with the change in percent of devices staying completely at home since shelter-in-place started relative to the pre-shelter-in-place levels. I plot the change in percentage staying completely at home, and show linear fitting models for the change in percent staying at home, as well as the fractional increase in percent staying home.
# collect the demographic variables
sj_dem_distancing <- sj_internet_by_block %>%
dplyr::select(`percent high speed`, `% not completely at home`, `% Completely at Home`, blockgroup) %>%
left_join(sj_education_by_block %>% dplyr::select(blockgroup, `percent associates or higher`)) %>%
left_join(sj_ami_by_block %>% dplyr::select(blockgroup, `% over 125,000`)) %>%
left_join(sj_ami_by_block %>% dplyr::select(blockgroup, `% over 100,000`)) %>%
left_join(sj_ami_by_block %>% dplyr::select(blockgroup, `% over 75,000`)) %>%
left_join(sj_age_by_block %>% dplyr::select(blockgroup, `percent less than 30`)) %>%
left_join(sj_age_by_block %>% dplyr::select(blockgroup, `percent elderly`)) %>%
left_join(sj_lang_by_block %>% dplyr::select(blockgroup, `% not speaking spanish`)) %>%
left_join(sj_lang_by_block %>% dplyr::select(blockgroup, `% speaking english > well`)) %>%
left_join(sj_no_vehicles_by_block %>% dplyr::select(blockgroup, `percent with vehicles`)) %>%
left_join(sj_occupants_per_room_by_block %>% dplyr::select(blockgroup, `percent less than 1`)) %>%
left_join(sj_sex_workers_by_block %>% dplyr::select(blockgroup, `% male workers`)) %>%
left_join(sj_race_by_block %>% dplyr::select(blockgroup, `% white`, `% Asian`, `% non hispanic/latino`)) %>%
left_join(sj_age_by_block %>% dplyr::select(blockgroup, `percent less than 18`)) %>%
left_join(sj_age_by_block %>% dplyr::select(blockgroup, `percent 20-29`))
sj_dem_distancing_pre_post <- sj_dem_distancing %>%
left_join(sj_internet_by_block %>% dplyr::select(`% not completely at home pre shelter`, `% Completely at Home Pre Shelter`, blockgroup)) %>%
mutate(`% increase in staying completely home` = `% Completely at Home` - `% Completely at Home Pre Shelter`, frac_increase = `% increase in staying completely home`/`% Completely at Home Pre Shelter`) %>%
mutate(`% hispanic/latino` = (100 - `% non hispanic/latino`))
sj_dem_distancing[is.na(sj_dem_distancing)] <- 0
sj_dem_distancing_pre_post[is.na(sj_dem_distancing_pre_post)] <- 0
saveRDS(sj_dem_distancing_pre_post, "/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/sj_socialdistancing_demdata_prepostdifs_manyvars.rds")
# sj_dem_distancing_pre_post <- readRDS("/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/sj_socialdistancing_demdata_prepostdifs_manyvars.rds")
# age
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `percent less than 30`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents younger than 30",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and Young Age Groups"
)
young_model_dif <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`percent less than 30`)
summary(young_model_dif)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`percent less than 30`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.371 -5.352 -0.198 5.401 30.989
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 36.97144 1.67774 22.036
## sj_dem_distancing_pre_post$`percent less than 30` -0.28614 0.04253 -6.728
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## sj_dem_distancing_pre_post$`percent less than 30` 4.21e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.199 on 567 degrees of freedom
## Multiple R-squared: 0.07394, Adjusted R-squared: 0.0723
## F-statistic: 45.27 on 1 and 567 DF, p-value: 4.209e-11
young_model_frac <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`percent less than 30`)
summary(young_model_frac)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`percent less than 30`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0404 -0.3861 -0.1013 0.2908 2.8689
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 2.080149 0.116428 17.866
## sj_dem_distancing_pre_post$`percent less than 30` -0.021266 0.002951 -7.205
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## sj_dem_distancing_pre_post$`percent less than 30` 1.85e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6384 on 567 degrees of freedom
## Multiple R-squared: 0.08389, Adjusted R-squared: 0.08227
## F-statistic: 51.92 on 1 and 567 DF, p-value: 1.852e-12
sj_dem_distancing_pre_post %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
ggplot(aes(
x = `percent elderly`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents 65 and older",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and Elderly Population"
)
elderly_model_dif <- lm(`% increase in staying completely home` ~ `percent elderly`, sj_dem_distancing_pre_post %>% filter(`percent elderly` < 50))
summary(elderly_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent elderly`,
## data = sj_dem_distancing_pre_post %>% filter(`percent elderly` <
## 50))
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.162 -5.596 -0.409 5.698 31.761
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.08337 0.87415 25.263 < 2e-16 ***
## `percent elderly` 0.30467 0.06036 5.048 6.05e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.334 on 564 degrees of freedom
## Multiple R-squared: 0.04322, Adjusted R-squared: 0.04153
## F-statistic: 25.48 on 1 and 564 DF, p-value: 6.046e-07
elderly_model_frac <- lm(frac_increase ~ `percent elderly`, sj_dem_distancing_pre_post %>% filter(`percent elderly` < 50))
summary(elderly_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `percent elderly`, data = sj_dem_distancing_pre_post %>%
## filter(`percent elderly` < 50))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.58615 -0.41623 -0.09599 0.30691 2.91590
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.946092 0.060638 15.602 < 2e-16 ***
## `percent elderly` 0.024725 0.004187 5.905 6.09e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6475 on 564 degrees of freedom
## Multiple R-squared: 0.05823, Adjusted R-squared: 0.05656
## F-statistic: 34.87 on 1 and 564 DF, p-value: 6.087e-09
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `percent less than 18`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents younger than 18",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and Child Population"
)
child_model_dif <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`percent less than 18`)
summary(child_model_dif)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`percent less than 18`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.813 -5.942 -0.159 5.855 30.740
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 21.86853 1.37778 15.872
## sj_dem_distancing_pre_post$`percent less than 18` 0.18322 0.05871 3.121
## Pr(>|t|)
## (Intercept) <2e-16 ***
## sj_dem_distancing_pre_post$`percent less than 18` 0.0019 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.478 on 567 degrees of freedom
## Multiple R-squared: 0.01689, Adjusted R-squared: 0.01515
## F-statistic: 9.739 on 1 and 567 DF, p-value: 0.001895
child_model_frac <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`percent less than 18`)
summary(child_model_frac)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`percent less than 18`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6495 -0.4660 -0.1078 0.3188 2.7406
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.963196 0.096052 10.028
## sj_dem_distancing_pre_post$`percent less than 18` 0.013373 0.004093 3.267
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## sj_dem_distancing_pre_post$`percent less than 18` 0.00115 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6608 on 567 degrees of freedom
## Multiple R-squared: 0.01848, Adjusted R-squared: 0.01675
## F-statistic: 10.68 on 1 and 567 DF, p-value: 0.001151
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `percent 20-29`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents ages 20-29",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and Young Adult Population"
)
young_adult_model_dif <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`percent 20-29`)
summary(young_adult_model_dif)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`percent 20-29`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.3972 -5.2744 -0.0801 5.2712 31.1251
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.99720 0.73528 43.517 <2e-16
## sj_dem_distancing_pre_post$`percent 20-29` -0.43367 0.04574 -9.481 <2e-16
##
## (Intercept) ***
## sj_dem_distancing_pre_post$`percent 20-29` ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.881 on 567 degrees of freedom
## Multiple R-squared: 0.1368, Adjusted R-squared: 0.1353
## F-statistic: 89.89 on 1 and 567 DF, p-value: < 2.2e-16
young_adult_model_frac <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`percent 20-29`)
summary(young_adult_model_frac)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`percent 20-29`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.67805 -0.39960 -0.08361 0.29977 2.61537
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 1.717783 0.050598 33.95
## sj_dem_distancing_pre_post$`percent 20-29` -0.032757 0.003147 -10.41
## Pr(>|t|)
## (Intercept) <2e-16 ***
## sj_dem_distancing_pre_post$`percent 20-29` <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6112 on 567 degrees of freedom
## Multiple R-squared: 0.1604, Adjusted R-squared: 0.1589
## F-statistic: 108.3 on 1 and 567 DF, p-value: < 2.2e-16
Here I look at each age bracket individually and see the effect size.
sj_all_age_by_block <- sj_all_age_by_block %>%
left_join(sj_dem_distancing_pre_post %>% dplyr::select(blockgroup, `% increase in staying completely home`)) %>%
mutate(`% 80 and older` = (`80 to 84 years` + `85 years and over`)*100/total)
for (i in 2:(ncol(sj_all_age_by_block)-3)) {
colName <- colnames(sj_all_age_by_block)[i]
columnToUse <- sj_all_age_by_block %>% dplyr::select(blockgroup, colName, total)
percent_vals <- (columnToUse[,2]*100)/columnToUse$total
print(colName)
age_bracket_model <- lm(sj_all_age_by_block$`% increase in staying completely home` ~ percent_vals[,1])
print(summary(age_bracket_model))
}
## [1] "10 to 14 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.989 -5.772 -0.251 6.106 31.107
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.6945 0.8456 26.839 < 2e-16 ***
## percent_vals[, 1] 0.5097 0.1159 4.399 1.3e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.4 on 567 degrees of freedom
## Multiple R-squared: 0.033, Adjusted R-squared: 0.0313
## F-statistic: 19.35 on 1 and 567 DF, p-value: 1.299e-05
##
## [1] "15 to 17 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.590 -5.703 -0.299 6.025 28.762
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.9548 0.7319 32.729 < 2e-16 ***
## percent_vals[, 1] 0.5440 0.1647 3.303 0.00102 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.469 on 567 degrees of freedom
## Multiple R-squared: 0.01887, Adjusted R-squared: 0.01714
## F-statistic: 10.91 on 1 and 567 DF, p-value: 0.001018
##
## [1] "18 and 19 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.983 -5.726 -0.313 5.555 32.183
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26.9832 0.5096 52.947 < 2e-16 ***
## percent_vals[, 1] -0.4841 0.1549 -3.126 0.00186 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.478 on 567 degrees of freedom
## Multiple R-squared: 0.01694, Adjusted R-squared: 0.01521
## F-statistic: 9.772 on 1 and 567 DF, p-value: 0.001863
##
## [1] "20 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.037 -5.937 -0.337 5.844 30.463
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.0367 0.4878 55.42 < 2e-16 ***
## percent_vals[, 1] -0.8129 0.2203 -3.69 0.000246 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.447 on 567 degrees of freedom
## Multiple R-squared: 0.02346, Adjusted R-squared: 0.02173
## F-statistic: 13.62 on 1 and 567 DF, p-value: 0.0002455
##
## [1] "21 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.799 -6.011 -0.260 5.586 31.872
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.7094 0.4820 57.49 < 2e-16 ***
## percent_vals[, 1] -1.3987 0.2316 -6.04 2.78e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.266 on 567 degrees of freedom
## Multiple R-squared: 0.06046, Adjusted R-squared: 0.0588
## F-statistic: 36.49 on 1 and 567 DF, p-value: 2.784e-09
##
## [1] "22 to 24 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.392 -5.787 -0.292 5.508 30.849
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.2916 0.6200 47.242 < 2e-16 ***
## percent_vals[, 1] -0.8348 0.1227 -6.806 2.56e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.191 on 567 degrees of freedom
## Multiple R-squared: 0.07552, Adjusted R-squared: 0.07389
## F-statistic: 46.32 on 1 and 567 DF, p-value: 2.562e-11
##
## [1] "25 to 29 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.380 -5.172 -0.203 5.482 34.778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.54566 0.65504 45.105 < 2e-16 ***
## percent_vals[, 1] -0.48262 0.07178 -6.724 4.33e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.2 on 567 degrees of freedom
## Multiple R-squared: 0.07385, Adjusted R-squared: 0.07221
## F-statistic: 45.21 on 1 and 567 DF, p-value: 4.327e-11
##
## [1] "30 to 34 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.169 -5.719 0.054 6.108 28.876
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.62409 0.76260 37.535 < 2e-16 ***
## percent_vals[, 1] -0.35981 0.08895 -4.045 5.96e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.424 on 567 degrees of freedom
## Multiple R-squared: 0.02805, Adjusted R-squared: 0.02633
## F-statistic: 16.36 on 1 and 567 DF, p-value: 5.956e-05
##
## [1] "35 to 39 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.178 -5.931 -0.342 5.886 30.945
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.1001 0.9283 29.19 <2e-16 ***
## percent_vals[, 1] -0.1541 0.1158 -1.33 0.184
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.545 on 567 degrees of freedom
## Multiple R-squared: 0.003112, Adjusted R-squared: 0.001354
## F-statistic: 1.77 on 1 and 567 DF, p-value: 0.1839
##
## [1] "40 to 44 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.904 -5.864 -0.229 5.725 29.702
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.0449 0.9790 24.560 <2e-16 ***
## percent_vals[, 1] 0.2690 0.1239 2.171 0.0304 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.52 on 567 degrees of freedom
## Multiple R-squared: 0.008243, Adjusted R-squared: 0.006494
## F-statistic: 4.713 on 1 and 567 DF, p-value: 0.03036
##
## [1] "45 to 49 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.833 -5.939 -0.224 5.619 29.100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.4085 0.9112 24.593 < 2e-16 ***
## percent_vals[, 1] 0.4955 0.1138 4.354 1.58e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.404 on 567 degrees of freedom
## Multiple R-squared: 0.03236, Adjusted R-squared: 0.03065
## F-statistic: 18.96 on 1 and 567 DF, p-value: 1.584e-05
##
## [1] "5 to 9 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.107 -5.944 -0.152 5.796 30.971
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.1977 0.8763 27.615 <2e-16 ***
## percent_vals[, 1] 0.2914 0.1272 2.292 0.0223 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.515 on 567 degrees of freedom
## Multiple R-squared: 0.009177, Adjusted R-squared: 0.00743
## F-statistic: 5.252 on 1 and 567 DF, p-value: 0.02229
##
## [1] "50 to 54 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.426 -5.480 -0.427 5.792 27.931
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.3476 0.8544 26.157 < 2e-16 ***
## percent_vals[, 1] 0.5074 0.1058 4.795 2.08e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.371 on 567 degrees of freedom
## Multiple R-squared: 0.03897, Adjusted R-squared: 0.03728
## F-statistic: 22.99 on 1 and 567 DF, p-value: 2.079e-06
##
## [1] "55 to 59 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.321 -5.867 -0.114 5.616 31.636
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.4962 0.8143 30.083 <2e-16 ***
## percent_vals[, 1] 0.2297 0.1095 2.099 0.0363 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.523 on 567 degrees of freedom
## Multiple R-squared: 0.007707, Adjusted R-squared: 0.005957
## F-statistic: 4.404 on 1 and 567 DF, p-value: 0.0363
##
## [1] "60 and 61 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.186 -6.006 -0.248 5.744 31.983
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.1857 0.6689 37.654 <2e-16 ***
## percent_vals[, 1] 0.3240 0.2172 1.492 0.136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.541 on 567 degrees of freedom
## Multiple R-squared: 0.003911, Adjusted R-squared: 0.002154
## F-statistic: 2.226 on 1 and 567 DF, p-value: 0.1363
##
## [1] "62 to 64 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.708 -6.026 -0.223 5.733 32.871
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.2843 0.7159 33.924 < 2e-16 ***
## percent_vals[, 1] 0.5286 0.1849 2.859 0.00441 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.491 on 567 degrees of freedom
## Multiple R-squared: 0.01421, Adjusted R-squared: 0.01247
## F-statistic: 8.173 on 1 and 567 DF, p-value: 0.004407
##
## [1] "65 and 66 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.955 -5.925 -0.190 5.921 32.001
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.4986 0.6466 39.434 <2e-16 ***
## percent_vals[, 1] 0.2541 0.2649 0.959 0.338
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.552 on 567 degrees of freedom
## Multiple R-squared: 0.00162, Adjusted R-squared: -0.0001409
## F-statistic: 0.92 on 1 and 567 DF, p-value: 0.3379
##
## [1] "67 to 69 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.865 -5.674 -0.273 5.697 32.543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.5857 0.6293 39.066 < 2e-16 ***
## percent_vals[, 1] 0.5700 0.1985 2.871 0.00425 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.491 on 567 degrees of freedom
## Multiple R-squared: 0.01433, Adjusted R-squared: 0.01259
## F-statistic: 8.242 on 1 and 567 DF, p-value: 0.004246
##
## [1] "70 to 74 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.884 -5.820 -0.349 5.807 31.850
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.2424 0.6412 39.367 <2e-16 ***
## percent_vals[, 1] 0.2305 0.1554 1.483 0.139
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.541 on 567 degrees of freedom
## Multiple R-squared: 0.003863, Adjusted R-squared: 0.002106
## F-statistic: 2.199 on 1 and 567 DF, p-value: 0.1387
##
## [1] "75 to 79 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.139 -5.968 -0.135 5.853 31.264
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.1390 0.5502 45.694 <2e-16 ***
## percent_vals[, 1] 0.3468 0.1552 2.235 0.0258 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.518 on 567 degrees of freedom
## Multiple R-squared: 0.008733, Adjusted R-squared: 0.006984
## F-statistic: 4.995 on 1 and 567 DF, p-value: 0.02581
##
## [1] "80 to 84 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.332 -5.475 0.005 5.789 28.268
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.3324 0.5325 45.698 < 2e-16 ***
## percent_vals[, 1] 1.0399 0.2257 4.608 5.02e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.385 on 567 degrees of freedom
## Multiple R-squared: 0.0361, Adjusted R-squared: 0.0344
## F-statistic: 21.24 on 1 and 567 DF, p-value: 5.022e-06
##
## [1] "85 years and over"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.873 -5.887 -0.173 5.869 31.569
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.87296 0.51524 50.216 <2e-16 ***
## percent_vals[, 1] 0.06881 0.19787 0.348 0.728
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.558 on 567 degrees of freedom
## Multiple R-squared: 0.0002132, Adjusted R-squared: -0.00155
## F-statistic: 0.1209 on 1 and 567 DF, p-value: 0.7282
##
## [1] "Under 5 years"
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## percent_vals[, 1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.346 -5.684 0.088 5.939 29.968
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.9474 0.8369 33.393 < 2e-16 ***
## percent_vals[, 1] -0.3192 0.1198 -2.665 0.00791 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.5 on 567 degrees of freedom
## Multiple R-squared: 0.01237, Adjusted R-squared: 0.01063
## F-statistic: 7.103 on 1 and 567 DF, p-value: 0.007914
summary(lm(sj_all_age_by_block$`% increase in staying completely home` ~ sj_all_age_by_block$`% 80 and older`))
##
## Call:
## lm(formula = sj_all_age_by_block$`% increase in staying completely home` ~
## sj_all_age_by_block$`% 80 and older`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.892 -5.836 -0.180 5.700 30.432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.8917 0.5666 43.930 < 2e-16 ***
## sj_all_age_by_block$`% 80 and older` 0.3390 0.1249 2.714 0.00686 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.498 on 567 degrees of freedom
## Multiple R-squared: 0.01282, Adjusted R-squared: 0.01108
## F-statistic: 7.364 on 1 and 567 DF, p-value: 0.006858
# income - less than $75000
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `% over 75,000`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $75,000 (50% AMI) annually",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and Households Above 50% AMI"
)
income_75_model_dif <- lm(`% increase in staying completely home` ~ `% over 75,000`, sj_dem_distancing_pre_post)
summary(income_75_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% over 75,000`,
## data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.248 -4.180 0.456 4.620 24.878
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.10256 1.18011 6.866 1.74e-11 ***
## `% over 75,000` 0.28765 0.01821 15.799 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.966 on 567 degrees of freedom
## Multiple R-squared: 0.3057, Adjusted R-squared: 0.3044
## F-statistic: 249.6 on 1 and 567 DF, p-value: < 2.2e-16
income_75_model_frac <- lm(frac_increase ~ `% over 75,000`, sj_dem_distancing_pre_post)
summary(income_75_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% over 75,000`, data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.72742 -0.30605 -0.04073 0.29059 2.51466
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.064774 0.083717 0.774 0.439
## `% over 75,000` 0.019284 0.001292 14.931 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5651 on 567 degrees of freedom
## Multiple R-squared: 0.2822, Adjusted R-squared: 0.281
## F-statistic: 222.9 on 1 and 567 DF, p-value: < 2.2e-16
# income - less than $100000
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `% over 100,000`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $100,000 (80% AMI) annually",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and Households Below 80% AMI"
)
income_100_model_dif <- lm(`% increase in staying completely home` ~ `% over 100,000`, sj_dem_distancing_pre_post)
summary(income_100_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% over 100,000`,
## data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.603 -4.175 0.734 5.018 21.595
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.25846 0.90960 12.38 <2e-16 ***
## `% over 100,000` 0.28914 0.01669 17.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.73 on 567 degrees of freedom
## Multiple R-squared: 0.3462, Adjusted R-squared: 0.3451
## F-statistic: 300.2 on 1 and 567 DF, p-value: < 2.2e-16
income_100_model_frac <- lm(frac_increase ~ `% over 100,000`, sj_dem_distancing_pre_post)
summary(income_100_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% over 100,000`, data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.82682 -0.31981 -0.01839 0.26521 2.68925
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.254910 0.064069 3.979 7.83e-05 ***
## `% over 100,000` 0.019805 0.001175 16.851 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5444 on 567 degrees of freedom
## Multiple R-squared: 0.3337, Adjusted R-squared: 0.3325
## F-statistic: 283.9 on 1 and 567 DF, p-value: < 2.2e-16
# income - less than $125000
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `% over 125,000`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $125,000 annually",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and Households Below $125,000"
)
income_125_model_dif <- lm(`% increase in staying completely home` ~ `% over 125,000`, sj_dem_distancing_pre_post)
summary(income_125_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% over 125,000`,
## data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.465 -3.935 0.927 4.907 21.255
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.38710 0.75357 17.77 <2e-16 ***
## `% over 125,000` 0.30678 0.01665 18.43 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.56 on 567 degrees of freedom
## Multiple R-squared: 0.3746, Adjusted R-squared: 0.3735
## F-statistic: 339.6 on 1 and 567 DF, p-value: < 2.2e-16
income_125_model_frac <- lm(frac_increase ~ `% over 125,000`, sj_dem_distancing_pre_post)
summary(income_125_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% over 125,000`, data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.90642 -0.29574 -0.00501 0.25612 2.57094
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.374004 0.052192 7.166 2.42e-12 ***
## `% over 125,000` 0.021664 0.001153 18.789 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5236 on 567 degrees of freedom
## Multiple R-squared: 0.3837, Adjusted R-squared: 0.3826
## F-statistic: 353 on 1 and 567 DF, p-value: < 2.2e-16
# language
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `% speaking english > well`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals speaking English well",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and English Language Ability"
)
english_ability_model_dif <- lm(`% increase in staying completely home` ~ `% speaking english > well`, sj_dem_distancing_pre_post)
summary(english_ability_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% speaking english > well`,
## data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.594 -4.552 0.305 5.103 30.299
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.96604 3.63137 -1.643 0.101
## `% speaking english > well` 0.36018 0.04072 8.846 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.961 on 567 degrees of freedom
## Multiple R-squared: 0.1213, Adjusted R-squared: 0.1197
## F-statistic: 78.26 on 1 and 567 DF, p-value: < 2.2e-16
english_ability_model_frac <- lm(frac_increase ~ `% speaking english > well`, sj_dem_distancing_pre_post)
summary(english_ability_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% speaking english > well`, data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.55871 -0.35286 -0.03063 0.28273 2.70000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.366510 0.246418 -5.546 4.5e-08 ***
## `% speaking english > well` 0.029650 0.002763 10.731 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6081 on 567 degrees of freedom
## Multiple R-squared: 0.1688, Adjusted R-squared: 0.1674
## F-statistic: 115.2 on 1 and 567 DF, p-value: < 2.2e-16
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `% not speaking spanish`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals not speaking Spanish",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and Spanish Language Ability"
)
spanish_speaking_model_dif <- lm(`% increase in staying completely home` ~ `% not speaking spanish`, sj_dem_distancing_pre_post)
summary(spanish_speaking_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% not speaking spanish`,
## data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.456 -3.586 0.750 5.080 26.180
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.34954 1.39163 5.281 1.83e-07 ***
## `% not speaking spanish` 0.23971 0.01734 13.827 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.267 on 567 degrees of freedom
## Multiple R-squared: 0.2522, Adjusted R-squared: 0.2508
## F-statistic: 191.2 on 1 and 567 DF, p-value: < 2.2e-16
spanish_speaking_model_frac <- lm(frac_increase ~ `% not speaking spanish`, sj_dem_distancing_pre_post)
summary(spanish_speaking_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% not speaking spanish`, data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.62776 -0.31980 -0.02877 0.27021 2.47133
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.043619 0.096919 -0.45 0.653
## `% not speaking spanish` 0.016815 0.001207 13.93 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5757 on 567 degrees of freedom
## Multiple R-squared: 0.2549, Adjusted R-squared: 0.2536
## F-statistic: 194 on 1 and 567 DF, p-value: < 2.2e-16
# occupants per room
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `percent less than 1`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households with 1 or fewer occupant per room",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and Room Occupancy"
)
occupants_model_dif <- lm(`% increase in staying completely home` ~ `percent less than 1`, sj_dem_distancing_pre_post)
summary(occupants_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent less than 1`,
## data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -37.715 -4.498 0.298 5.055 27.842
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.22883 2.97184 -2.432 0.0153 *
## `percent less than 1` 0.36886 0.03276 11.260 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.642 on 567 degrees of freedom
## Multiple R-squared: 0.1828, Adjusted R-squared: 0.1813
## F-statistic: 126.8 on 1 and 567 DF, p-value: < 2.2e-16
occupants_model_frac <- lm(frac_increase ~ `percent less than 1`, sj_dem_distancing_pre_post)
summary(occupants_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `percent less than 1`, data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5701 -0.3835 -0.0605 0.2602 2.5787
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.149711 0.205380 -5.598 3.38e-08 ***
## `percent less than 1` 0.026802 0.002264 11.839 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5972 on 567 degrees of freedom
## Multiple R-squared: 0.1982, Adjusted R-squared: 0.1968
## F-statistic: 140.2 on 1 and 567 DF, p-value: < 2.2e-16
# vehicles - percent with no vehicles
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `percent with vehicles`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with vehicles available",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and Vehicle Availability"
)
vehicles_model_dif <- lm(`% increase in staying completely home` ~ `percent with vehicles`, sj_dem_distancing_pre_post)
summary(vehicles_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent with vehicles`,
## data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.787 -5.768 -0.271 5.232 29.732
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.88779 4.87608 -1.618 0.106
## `percent with vehicles` 0.35656 0.05117 6.969 8.92e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.175 on 567 degrees of freedom
## Multiple R-squared: 0.07889, Adjusted R-squared: 0.07726
## F-statistic: 48.56 on 1 and 567 DF, p-value: 8.921e-12
vehicles_model_frac <- lm(frac_increase ~ `percent with vehicles`, sj_dem_distancing_pre_post)
summary(vehicles_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `percent with vehicles`, data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7224 -0.4244 -0.1149 0.3004 2.8641
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.906858 0.342485 -2.648 0.00833 **
## `percent with vehicles` 0.022848 0.003594 6.357 4.22e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6444 on 567 degrees of freedom
## Multiple R-squared: 0.06654, Adjusted R-squared: 0.06489
## F-statistic: 40.42 on 1 and 567 DF, p-value: 4.222e-10
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `percent associates or higher`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of people with an degree at Associate's level or higher",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and Education"
)
educ_model_dif <- lm(`% increase in staying completely home` ~ `percent associates or higher`, sj_dem_distancing_pre_post)
summary(educ_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent associates or higher`,
## data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.024 -3.195 0.959 4.990 22.843
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.92391 0.87475 14.77 <2e-16 ***
## `percent associates or higher` 0.27700 0.01716 16.14 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.913 on 567 degrees of freedom
## Multiple R-squared: 0.3147, Adjusted R-squared: 0.3135
## F-statistic: 260.4 on 1 and 567 DF, p-value: < 2.2e-16
educ_model_frac <- lm(frac_increase ~ `percent associates or higher`, sj_dem_distancing_pre_post)
summary(educ_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `percent associates or higher`,
## data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.24512 -0.29235 -0.00326 0.28250 2.55065
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.352492 0.061037 5.775 1.27e-08 ***
## `percent associates or higher` 0.019324 0.001198 16.134 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5522 on 567 degrees of freedom
## Multiple R-squared: 0.3146, Adjusted R-squared: 0.3134
## F-statistic: 260.3 on 1 and 567 DF, p-value: < 2.2e-16
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `percent high speed`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households with broadband such as cable, fiber optic or DSL",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and High Speed Internet"
)
internet_model_dif <- lm(`% increase in staying completely home` ~ `percent high speed`, sj_dem_distancing_pre_post)
summary(internet_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent high speed`,
## data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -37.275 -4.808 0.533 5.350 27.409
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.19418 2.33393 -1.369 0.172
## `percent high speed` 0.36232 0.02864 12.649 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.442 on 567 degrees of freedom
## Multiple R-squared: 0.2201, Adjusted R-squared: 0.2187
## F-statistic: 160 on 1 and 567 DF, p-value: < 2.2e-16
internet_model_frac <- lm(frac_increase ~ `percent high speed`, sj_dem_distancing_pre_post)
summary(internet_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `percent high speed`, data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.68581 -0.33803 -0.07178 0.25763 2.56825
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.647258 0.165554 -3.91 0.000104 ***
## `percent high speed` 0.023728 0.002032 11.68 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5988 on 567 degrees of freedom
## Multiple R-squared: 0.1939, Adjusted R-squared: 0.1925
## F-statistic: 136.4 on 1 and 567 DF, p-value: < 2.2e-16
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `% male workers`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of workers that are male",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and Sex of Workers"
)
sex_workers_model_dif <- lm(`% increase in staying completely home` ~ `% male workers`, sj_dem_distancing_pre_post)
summary(sex_workers_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% male workers`,
## data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.145 -5.935 -0.211 5.905 31.467
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.95036 6.21488 3.854 0.00013 ***
## `% male workers` 0.03804 0.11591 0.328 0.74291
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.559 on 567 degrees of freedom
## Multiple R-squared: 0.0001899, Adjusted R-squared: -0.001573
## F-statistic: 0.1077 on 1 and 567 DF, p-value: 0.7429
sex_workers_model_frac <- lm(frac_increase ~ `% male workers`, sj_dem_distancing_pre_post)
summary(sex_workers_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% male workers`, data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7784 -0.4487 -0.1048 0.3191 2.7785
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.711352 0.433040 1.643 0.101
## `% male workers` 0.010323 0.008076 1.278 0.202
##
## Residual standard error: 0.666 on 567 degrees of freedom
## Multiple R-squared: 0.002873, Adjusted R-squared: 0.001114
## F-statistic: 1.634 on 1 and 567 DF, p-value: 0.2017
# white
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `% white`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are white",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and White Residents"
)
white_model_dif <- lm(`% increase in staying completely home` ~ `% white`, sj_dem_distancing_pre_post)
summary(white_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% white`,
## data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.933 -5.793 -0.369 5.563 32.323
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.70379 0.88859 27.801 <2e-16 ***
## `% white` 0.02977 0.01843 1.615 0.107
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.538 on 567 degrees of freedom
## Multiple R-squared: 0.004581, Adjusted R-squared: 0.002825
## F-statistic: 2.609 on 1 and 567 DF, p-value: 0.1068
white_model_frac <- lm(frac_increase ~ `% white`, sj_dem_distancing_pre_post)
summary(white_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% white`, data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.72422 -0.41988 -0.09149 0.30784 2.86971
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.998632 0.060878 16.404 < 2e-16 ***
## `% white` 0.006156 0.001263 4.875 1.41e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6534 on 567 degrees of freedom
## Multiple R-squared: 0.04024, Adjusted R-squared: 0.03854
## F-statistic: 23.77 on 1 and 567 DF, p-value: 1.412e-06
# asian
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `% Asian`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are Asian",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and Asian Residents"
)
asian_model_dif <- lm(`% increase in staying completely home` ~ `% Asian`, sj_dem_distancing_pre_post)
summary(asian_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% Asian`,
## data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.878 -5.446 -0.278 5.730 25.557
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.4363 0.6861 31.244 < 2e-16 ***
## `% Asian` 0.1402 0.0176 7.964 9.14e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.066 on 567 degrees of freedom
## Multiple R-squared: 0.1006, Adjusted R-squared: 0.09903
## F-statistic: 63.43 on 1 and 567 DF, p-value: 9.139e-15
asian_model_frac <- lm(frac_increase ~ `% Asian`, sj_dem_distancing_pre_post)
summary(asian_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% Asian`, data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7310 -0.4560 -0.1234 0.3023 2.8051
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.092511 0.049733 21.968 < 2e-16 ***
## `% Asian` 0.005274 0.001276 4.134 4.1e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6571 on 567 degrees of freedom
## Multiple R-squared: 0.02926, Adjusted R-squared: 0.02755
## F-statistic: 17.09 on 1 and 567 DF, p-value: 4.099e-05
# hispanic/latino
sj_dem_distancing_pre_post %>%
ggplot(aes(
x = `% non hispanic/latino`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are not Hispanic or Latino",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "San Jose: Social Distancing and Hispanic/Latino Residents"
)
hisp_model_dif <- lm(`% increase in staying completely home` ~ `% non hispanic/latino`, sj_dem_distancing_pre_post)
summary(hisp_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% non hispanic/latino`,
## data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.292 -3.765 0.875 5.012 24.425
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.4901 1.0752 9.757 <2e-16 ***
## `% non hispanic/latino` 0.2278 0.0150 15.181 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.061 on 567 degrees of freedom
## Multiple R-squared: 0.289, Adjusted R-squared: 0.2877
## F-statistic: 230.5 on 1 and 567 DF, p-value: < 2.2e-16
hisp_model_frac <- lm(frac_increase ~ `% non hispanic/latino`, sj_dem_distancing_pre_post)
summary(hisp_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% non hispanic/latino`, data = sj_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.72265 -0.34701 -0.02016 0.28153 2.48350
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.202196 0.075564 2.676 0.00767 **
## `% non hispanic/latino` 0.015602 0.001054 14.797 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5665 on 567 degrees of freedom
## Multiple R-squared: 0.2786, Adjusted R-squared: 0.2773
## F-statistic: 219 on 1 and 567 DF, p-value: < 2.2e-16
First with difference in percents.
difs_model <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent less than 1` + sj_dem_distancing_pre_post$`percent high speed`)
summary(difs_model)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish` +
## sj_dem_distancing_pre_post$`percent less than 1` + sj_dem_distancing_pre_post$`percent high speed`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.375 -3.581 0.687 4.711 20.908
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 5.899964 3.152979
## sj_dem_distancing_pre_post$`% over 125,000` 0.229622 0.023580
## sj_dem_distancing_pre_post$`% not speaking spanish` 0.084737 0.024407
## sj_dem_distancing_pre_post$`percent less than 1` -0.002893 0.042081
## sj_dem_distancing_pre_post$`percent high speed` 0.053743 0.035259
## t value Pr(>|t|)
## (Intercept) 1.871 0.061830 .
## sj_dem_distancing_pre_post$`% over 125,000` 9.738 < 2e-16 ***
## sj_dem_distancing_pre_post$`% not speaking spanish` 3.472 0.000557 ***
## sj_dem_distancing_pre_post$`percent less than 1` -0.069 0.945214
## sj_dem_distancing_pre_post$`percent high speed` 1.524 0.128009
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.42 on 564 degrees of freedom
## Multiple R-squared: 0.4007, Adjusted R-squared: 0.3965
## F-statistic: 94.28 on 4 and 564 DF, p-value: < 2.2e-16
Second with fractional change.
frac_model <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent less than 1` + sj_dem_distancing_pre_post$`percent high speed`)
summary(frac_model)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +
## sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent less than 1` +
## sj_dem_distancing_pre_post$`percent high speed`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.88211 -0.28985 0.01105 0.25025 2.47573
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -9.937e-02 2.188e-01
## sj_dem_distancing_pre_post$`% over 125,000` 1.714e-02 1.636e-03
## sj_dem_distancing_pre_post$`% not speaking spanish` 5.666e-03 1.693e-03
## sj_dem_distancing_pre_post$`percent less than 1` 2.438e-03 2.920e-03
## sj_dem_distancing_pre_post$`percent high speed` -8.851e-06 2.446e-03
## t value Pr(>|t|)
## (Intercept) -0.454 0.649839
## sj_dem_distancing_pre_post$`% over 125,000` 10.474 < 2e-16 ***
## sj_dem_distancing_pre_post$`% not speaking spanish` 3.346 0.000876 ***
## sj_dem_distancing_pre_post$`percent less than 1` 0.835 0.403996
## sj_dem_distancing_pre_post$`percent high speed` -0.004 0.997115
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5148 on 564 degrees of freedom
## Multiple R-squared: 0.4074, Adjusted R-squared: 0.4032
## F-statistic: 96.92 on 4 and 564 DF, p-value: < 2.2e-16
difs_model_inc_span <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish`)
summary(difs_model_inc_span)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.357 -3.676 0.924 4.571 21.519
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 8.61183 1.25411 6.867
## sj_dem_distancing_pre_post$`% over 125,000` 0.24519 0.02092 11.720
## sj_dem_distancing_pre_post$`% not speaking spanish` 0.09395 0.01992 4.716
## Pr(>|t|)
## (Intercept) 1.73e-11 ***
## sj_dem_distancing_pre_post$`% over 125,000` < 2e-16 ***
## sj_dem_distancing_pre_post$`% not speaking spanish` 3.04e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.422 on 566 degrees of freedom
## Multiple R-squared: 0.3982, Adjusted R-squared: 0.3961
## F-statistic: 187.3 on 2 and 566 DF, p-value: < 2.2e-16
frac_model_inc_span <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish`)
summary(frac_model_inc_span)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +
## sj_dem_distancing_pre_post$`% not speaking spanish`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.89438 -0.28513 0.01266 0.25143 2.48667
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.046145 0.086888 0.531
## sj_dem_distancing_pre_post$`% over 125,000` 0.017436 0.001449 12.029
## sj_dem_distancing_pre_post$`% not speaking spanish` 0.006451 0.001380 4.673
## Pr(>|t|)
## (Intercept) 0.596
## sj_dem_distancing_pre_post$`% over 125,000` < 2e-16 ***
## sj_dem_distancing_pre_post$`% not speaking spanish` 3.71e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5142 on 566 degrees of freedom
## Multiple R-squared: 0.4066, Adjusted R-squared: 0.4045
## F-statistic: 193.9 on 2 and 566 DF, p-value: < 2.2e-16
When only accounting for for income, Spanish language ability is only slightly relevant, though the result is still nontrivial. Let’s try accounting for both education and income level.
difs_model_inc_span_educ <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_span_educ)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish` +
## sj_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.624 -3.586 0.945 4.759 20.957
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 9.65331 1.27940
## sj_dem_distancing_pre_post$`% over 125,000` 0.21037 0.02310
## sj_dem_distancing_pre_post$`% not speaking spanish` 0.03879 0.02551
## sj_dem_distancing_pre_post$`percent associates or higher` 0.09919 0.02906
## t value Pr(>|t|)
## (Intercept) 7.545 1.82e-13 ***
## sj_dem_distancing_pre_post$`% over 125,000` 9.106 < 2e-16 ***
## sj_dem_distancing_pre_post$`% not speaking spanish` 1.520 0.128958
## sj_dem_distancing_pre_post$`percent associates or higher` 3.413 0.000689 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.353 on 565 degrees of freedom
## Multiple R-squared: 0.4104, Adjusted R-squared: 0.4072
## F-statistic: 131.1 on 3 and 565 DF, p-value: < 2.2e-16
frac_model_inc_span_educ <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_span_educ)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +
## sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.84121 -0.28341 0.00487 0.24457 2.51719
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.113747 0.088752
## sj_dem_distancing_pre_post$`% over 125,000` 0.015175 0.001603
## sj_dem_distancing_pre_post$`% not speaking spanish` 0.002870 0.001770
## sj_dem_distancing_pre_post$`percent associates or higher` 0.006439 0.002016
## t value Pr(>|t|)
## (Intercept) 1.282 0.20050
## sj_dem_distancing_pre_post$`% over 125,000` 9.469 < 2e-16 ***
## sj_dem_distancing_pre_post$`% not speaking spanish` 1.622 0.10543
## sj_dem_distancing_pre_post$`percent associates or higher` 3.193 0.00148 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5101 on 565 degrees of freedom
## Multiple R-squared: 0.4171, Adjusted R-squared: 0.414
## F-statistic: 134.8 on 3 and 565 DF, p-value: < 2.2e-16
The effect of Spanish language speaking vanishes when accounting for both education and income.
difs_model_inc_eng_educ <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% speaking english > well` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_eng_educ)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% speaking english > well` +
## sj_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.661 -3.333 0.880 4.649 20.973
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 25.18077 3.52954
## sj_dem_distancing_pre_post$`% over 125,000` 0.23349 0.02296
## sj_dem_distancing_pre_post$`% speaking english > well` -0.19524 0.04768
## sj_dem_distancing_pre_post$`percent associates or higher` 0.18101 0.02580
## t value Pr(>|t|)
## (Intercept) 7.134 3.00e-12 ***
## sj_dem_distancing_pre_post$`% over 125,000` 10.171 < 2e-16 ***
## sj_dem_distancing_pre_post$`% speaking english > well` -4.095 4.84e-05 ***
## sj_dem_distancing_pre_post$`percent associates or higher` 7.015 6.61e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.262 on 565 degrees of freedom
## Multiple R-squared: 0.425, Adjusted R-squared: 0.422
## F-statistic: 139.2 on 3 and 565 DF, p-value: < 2.2e-16
frac_model_inc_eng_educ <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% speaking english > well` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_eng_educ)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +
## sj_dem_distancing_pre_post$`% speaking english > well` +
## sj_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8319 -0.2744 0.0116 0.2402 2.5344
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.538662 0.248145
## sj_dem_distancing_pre_post$`% over 125,000` 0.015973 0.001614
## sj_dem_distancing_pre_post$`% speaking english > well` -0.004388 0.003352
## sj_dem_distancing_pre_post$`percent associates or higher` 0.009720 0.001814
## t value Pr(>|t|)
## (Intercept) 2.171 0.0304 *
## sj_dem_distancing_pre_post$`% over 125,000` 9.897 < 2e-16 ***
## sj_dem_distancing_pre_post$`% speaking english > well` -1.309 0.1911
## sj_dem_distancing_pre_post$`percent associates or higher` 5.358 1.23e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5105 on 565 degrees of freedom
## Multiple R-squared: 0.4162, Adjusted R-squared: 0.4131
## F-statistic: 134.3 on 3 and 565 DF, p-value: < 2.2e-16
English language ability is actually a slightly better predictor than Spanish language ability, when also accounting for education and income.
difs_model_lots <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% speaking english > well` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent with vehicles`)
summary(difs_model_lots)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% speaking english > well` +
## sj_dem_distancing_pre_post$`percent associates or higher` +
## sj_dem_distancing_pre_post$`% not speaking spanish` +
## sj_dem_distancing_pre_post$`percent with vehicles`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -37.133 -3.433 1.112 4.632 21.106
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 17.65811 5.22625
## sj_dem_distancing_pre_post$`% over 125,000` 0.21340 0.02466
## sj_dem_distancing_pre_post$`% speaking english > well` -0.20722 0.04775
## sj_dem_distancing_pre_post$`percent associates or higher` 0.15189 0.03097
## sj_dem_distancing_pre_post$`% not speaking spanish` 0.04909 0.02521
## sj_dem_distancing_pre_post$`percent with vehicles` 0.07334 0.04480
## t value Pr(>|t|)
## (Intercept) 3.379 0.000779 ***
## sj_dem_distancing_pre_post$`% over 125,000` 8.652 < 2e-16 ***
## sj_dem_distancing_pre_post$`% speaking english > well` -4.340 1.69e-05 ***
## sj_dem_distancing_pre_post$`percent associates or higher` 4.905 1.23e-06 ***
## sj_dem_distancing_pre_post$`% not speaking spanish` 1.947 0.051974 .
## sj_dem_distancing_pre_post$`percent with vehicles` 1.637 0.102172
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.235 on 563 degrees of freedom
## Multiple R-squared: 0.4312, Adjusted R-squared: 0.4262
## F-statistic: 85.37 on 5 and 563 DF, p-value: < 2.2e-16
frac_model_lots <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% speaking english > well` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent with vehicles`)
summary(frac_model_lots)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +
## sj_dem_distancing_pre_post$`% speaking english > well` +
## sj_dem_distancing_pre_post$`percent associates or higher` +
## sj_dem_distancing_pre_post$`% not speaking spanish` + sj_dem_distancing_pre_post$`percent with vehicles`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.84150 -0.29296 0.00345 0.25028 2.50474
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.314763 0.368372
## sj_dem_distancing_pre_post$`% over 125,000` 0.015270 0.001738
## sj_dem_distancing_pre_post$`% speaking english > well` -0.004946 0.003366
## sj_dem_distancing_pre_post$`percent associates or higher` 0.007694 0.002183
## sj_dem_distancing_pre_post$`% not speaking spanish` 0.003113 0.001777
## sj_dem_distancing_pre_post$`percent with vehicles` 0.001639 0.003158
## t value Pr(>|t|)
## (Intercept) 0.854 0.393208
## sj_dem_distancing_pre_post$`% over 125,000` 8.784 < 2e-16 ***
## sj_dem_distancing_pre_post$`% speaking english > well` -1.469 0.142270
## sj_dem_distancing_pre_post$`percent associates or higher` 3.525 0.000458 ***
## sj_dem_distancing_pre_post$`% not speaking spanish` 1.752 0.080339 .
## sj_dem_distancing_pre_post$`percent with vehicles` 0.519 0.603837
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.51 on 563 degrees of freedom
## Multiple R-squared: 0.4195, Adjusted R-squared: 0.4144
## F-statistic: 81.38 on 5 and 563 DF, p-value: < 2.2e-16
The main important variables are education and income, with potentially some effect of English language ability.
difs_model_inc_educ <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_educ)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.536 -3.474 0.935 4.783 20.732
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 11.12744 0.83575
## sj_dem_distancing_pre_post$`% over 125,000` 0.21576 0.02286
## sj_dem_distancing_pre_post$`percent associates or higher` 0.12719 0.02251
## t value Pr(>|t|)
## (Intercept) 13.31 < 2e-16 ***
## sj_dem_distancing_pre_post$`% over 125,000` 9.44 < 2e-16 ***
## sj_dem_distancing_pre_post$`percent associates or higher` 5.65 2.55e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.362 on 566 degrees of freedom
## Multiple R-squared: 0.408, Adjusted R-squared: 0.4059
## F-statistic: 195 on 2 and 566 DF, p-value: < 2.2e-16
frac_model_inc_educ <- lm(sj_dem_distancing_pre_post$`frac_increase` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_educ)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +
## sj_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.82730 -0.27606 0.00481 0.25344 2.54945
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.222815 0.057992
## sj_dem_distancing_pre_post$`% over 125,000` 0.015574 0.001586
## sj_dem_distancing_pre_post$`percent associates or higher` 0.008510 0.001562
## t value Pr(>|t|)
## (Intercept) 3.842 0.000136 ***
## sj_dem_distancing_pre_post$`% over 125,000` 9.820 < 2e-16 ***
## sj_dem_distancing_pre_post$`percent associates or higher` 5.448 7.63e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5108 on 566 degrees of freedom
## Multiple R-squared: 0.4144, Adjusted R-squared: 0.4123
## F-statistic: 200.3 on 2 and 566 DF, p-value: < 2.2e-16
Comparing this to earlier models, we see that adding the English language ability variable does add some predictive power, though not much, and adding the vehicle ownership and Spanish language ability variables have negligible effects.
We now consider adding race into the regressions.
difs_model_inc_hisp <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% non hispanic/latino`)
summary(difs_model_inc_hisp)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% non hispanic/latino`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.000 -3.597 0.769 4.686 20.530
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 9.53147 0.98397 9.687
## sj_dem_distancing_pre_post$`% over 125,000` 0.22753 0.02108 10.796
## sj_dem_distancing_pre_post$`% non hispanic/latino` 0.10450 0.01781 5.867
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## sj_dem_distancing_pre_post$`% over 125,000` < 2e-16 ***
## sj_dem_distancing_pre_post$`% non hispanic/latino` 7.57e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.347 on 566 degrees of freedom
## Multiple R-squared: 0.4104, Adjusted R-squared: 0.4083
## F-statistic: 197 on 2 and 566 DF, p-value: < 2.2e-16
frac_model_inc_hisp <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% non hispanic/latino`)
summary(frac_model_inc_hisp)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +
## sj_dem_distancing_pre_post$`% non hispanic/latino`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.89541 -0.29602 0.01761 0.24967 2.51911
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.131886 0.068516 1.925
## sj_dem_distancing_pre_post$`% over 125,000` 0.016688 0.001468 11.371
## sj_dem_distancing_pre_post$`% non hispanic/latino` 0.006562 0.001240 5.291
## Pr(>|t|)
## (Intercept) 0.0547 .
## sj_dem_distancing_pre_post$`% over 125,000` < 2e-16 ***
## sj_dem_distancing_pre_post$`% non hispanic/latino` 1.75e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5116 on 566 degrees of freedom
## Multiple R-squared: 0.4128, Adjusted R-squared: 0.4107
## F-statistic: 198.9 on 2 and 566 DF, p-value: < 2.2e-16
difs_model_inc_hisp_educ <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% non hispanic/latino` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_hisp_educ)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% non hispanic/latino` +
## sj_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.991 -3.502 0.893 4.772 20.505
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 9.68175 0.98252
## sj_dem_distancing_pre_post$`% over 125,000` 0.20615 0.02299
## sj_dem_distancing_pre_post$`% non hispanic/latino` 0.06681 0.02423
## sj_dem_distancing_pre_post$`percent associates or higher` 0.06981 0.03056
## t value Pr(>|t|)
## (Intercept) 9.854 < 2e-16 ***
## sj_dem_distancing_pre_post$`% over 125,000` 8.968 < 2e-16 ***
## sj_dem_distancing_pre_post$`% non hispanic/latino` 2.757 0.00602 **
## sj_dem_distancing_pre_post$`percent associates or higher` 2.284 0.02272 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.319 on 565 degrees of freedom
## Multiple R-squared: 0.4158, Adjusted R-squared: 0.4127
## F-statistic: 134.1 on 3 and 565 DF, p-value: < 2.2e-16
frac_model_inc_hisp_educ <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% non hispanic/latino` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_hisp_educ)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +
## sj_dem_distancing_pre_post$`% non hispanic/latino` + sj_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.85044 -0.28600 0.01185 0.23880 2.52843
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.143421 0.068348
## sj_dem_distancing_pre_post$`% over 125,000` 0.015047 0.001599
## sj_dem_distancing_pre_post$`% non hispanic/latino` 0.003669 0.001686
## sj_dem_distancing_pre_post$`percent associates or higher` 0.005359 0.002126
## t value Pr(>|t|)
## (Intercept) 2.098 0.0363 *
## sj_dem_distancing_pre_post$`% over 125,000` 9.409 <2e-16 ***
## sj_dem_distancing_pre_post$`% non hispanic/latino` 2.177 0.0299 *
## sj_dem_distancing_pre_post$`percent associates or higher` 2.521 0.0120 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5092 on 565 degrees of freedom
## Multiple R-squared: 0.4193, Adjusted R-squared: 0.4162
## F-statistic: 136 on 3 and 565 DF, p-value: < 2.2e-16
When including education, percentage of Hispanic/Latino residents loses as much of its predictive power.
difs_model_inc_white_educ <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% white` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_white_educ)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% white` +
## sj_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.332 -3.526 1.077 4.585 17.568
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 12.81844 0.90049
## sj_dem_distancing_pre_post$`% over 125,000` 0.22410 0.02254
## sj_dem_distancing_pre_post$`% white` -0.06793 0.01483
## sj_dem_distancing_pre_post$`percent associates or higher` 0.14608 0.02251
## t value Pr(>|t|)
## (Intercept) 14.235 < 2e-16 ***
## sj_dem_distancing_pre_post$`% over 125,000` 9.944 < 2e-16 ***
## sj_dem_distancing_pre_post$`% white` -4.581 5.69e-06 ***
## sj_dem_distancing_pre_post$`percent associates or higher` 6.491 1.87e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.235 on 565 degrees of freedom
## Multiple R-squared: 0.4292, Adjusted R-squared: 0.4261
## F-statistic: 141.6 on 3 and 565 DF, p-value: < 2.2e-16
frac_model_inc_white_educ <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% white` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_white_educ)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +
## sj_dem_distancing_pre_post$`% white` + sj_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.82110 -0.27150 0.00171 0.25161 2.55499
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.2272842 0.0636330
## sj_dem_distancing_pre_post$`% over 125,000` 0.0155963 0.0015925
## sj_dem_distancing_pre_post$`% white` -0.0001795 0.0010477
## sj_dem_distancing_pre_post$`percent associates or higher` 0.0085599 0.0015904
## t value Pr(>|t|)
## (Intercept) 3.572 0.000385 ***
## sj_dem_distancing_pre_post$`% over 125,000` 9.793 < 2e-16 ***
## sj_dem_distancing_pre_post$`% white` -0.171 0.864006
## sj_dem_distancing_pre_post$`percent associates or higher` 5.382 1.08e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5113 on 565 degrees of freedom
## Multiple R-squared: 0.4144, Adjusted R-squared: 0.4113
## F-statistic: 133.3 on 3 and 565 DF, p-value: < 2.2e-16
difs_model_inc_asian_educ <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_asian_educ)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` +
## sj_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.705 -3.705 0.857 4.634 17.829
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 9.60424 0.85925
## sj_dem_distancing_pre_post$`% over 125,000` 0.21227 0.02228
## sj_dem_distancing_pre_post$`% Asian` 0.08002 0.01438
## sj_dem_distancing_pre_post$`percent associates or higher` 0.10745 0.02222
## t value Pr(>|t|)
## (Intercept) 11.177 < 2e-16 ***
## sj_dem_distancing_pre_post$`% over 125,000` 9.526 < 2e-16 ***
## sj_dem_distancing_pre_post$`% Asian` 5.564 4.08e-08 ***
## sj_dem_distancing_pre_post$`percent associates or higher` 4.835 1.72e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.175 on 565 degrees of freedom
## Multiple R-squared: 0.4387, Adjusted R-squared: 0.4357
## F-statistic: 147.2 on 3 and 565 DF, p-value: < 2.2e-16
frac_model_inc_asian_educ <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_asian_educ)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +
## sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8069 -0.2789 0.0056 0.2408 2.5652
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.2081541 0.0612043
## sj_dem_distancing_pre_post$`% over 125,000` 0.0155406 0.0015872
## sj_dem_distancing_pre_post$`% Asian` 0.0007702 0.0010245
## sj_dem_distancing_pre_post$`percent associates or higher` 0.0083200 0.0015831
## t value Pr(>|t|)
## (Intercept) 3.401 0.000719 ***
## sj_dem_distancing_pre_post$`% over 125,000` 9.791 < 2e-16 ***
## sj_dem_distancing_pre_post$`% Asian` 0.752 0.452473
## sj_dem_distancing_pre_post$`percent associates or higher` 5.256 2.09e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.511 on 565 degrees of freedom
## Multiple R-squared: 0.415, Adjusted R-squared: 0.4119
## F-statistic: 133.6 on 3 and 565 DF, p-value: < 2.2e-16
difs_model_inc_asian_educ_eng <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`% speaking english > well`)
summary(difs_model_inc_asian_educ_eng)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` +
## sj_dem_distancing_pre_post$`percent associates or higher` +
## sj_dem_distancing_pre_post$`% speaking english > well`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.293 -3.755 0.918 4.684 17.925
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 13.84272 4.57478
## sj_dem_distancing_pre_post$`% over 125,000` 0.21782 0.02305
## sj_dem_distancing_pre_post$`% Asian` 0.06956 0.01816
## sj_dem_distancing_pre_post$`percent associates or higher` 0.12550 0.02933
## sj_dem_distancing_pre_post$`% speaking english > well` -0.05612 0.05949
## t value Pr(>|t|)
## (Intercept) 3.026 0.002592 **
## sj_dem_distancing_pre_post$`% over 125,000` 9.450 < 2e-16 ***
## sj_dem_distancing_pre_post$`% Asian` 3.830 0.000143 ***
## sj_dem_distancing_pre_post$`percent associates or higher` 4.279 2.2e-05 ***
## sj_dem_distancing_pre_post$`% speaking english > well` -0.943 0.345943
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.175 on 564 degrees of freedom
## Multiple R-squared: 0.4396, Adjusted R-squared: 0.4356
## F-statistic: 110.6 on 4 and 564 DF, p-value: < 2.2e-16
frac_model_inc_asian_educ_eng <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`% speaking english > well`)
summary(frac_model_inc_asian_educ_eng)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +
## sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` +
## sj_dem_distancing_pre_post$`% speaking english > well`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.83412 -0.27408 0.01231 0.24078 2.53228
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.5510989 0.3257852
## sj_dem_distancing_pre_post$`% over 125,000` 0.0159899 0.0016414
## sj_dem_distancing_pre_post$`% Asian` -0.0000763 0.0012935
## sj_dem_distancing_pre_post$`percent associates or higher` 0.0097805 0.0020886
## sj_dem_distancing_pre_post$`% speaking english > well` -0.0045405 0.0042366
## t value Pr(>|t|)
## (Intercept) 1.692 0.0913 .
## sj_dem_distancing_pre_post$`% over 125,000` 9.742 < 2e-16 ***
## sj_dem_distancing_pre_post$`% Asian` -0.059 0.9530
## sj_dem_distancing_pre_post$`percent associates or higher` 4.683 3.55e-06 ***
## sj_dem_distancing_pre_post$`% speaking english > well` -1.072 0.2843
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.511 on 564 degrees of freedom
## Multiple R-squared: 0.4162, Adjusted R-squared: 0.4121
## F-statistic: 100.5 on 4 and 564 DF, p-value: < 2.2e-16
difs_model_inc_asian_educ_youngadult <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`percent 20-29`)
summary(difs_model_inc_asian_educ_youngadult)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` +
## sj_dem_distancing_pre_post$`percent associates or higher` +
## sj_dem_distancing_pre_post$`percent 20-29`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.952 -3.518 1.046 4.572 17.144
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 13.82849 1.13204
## sj_dem_distancing_pre_post$`% over 125,000` 0.17397 0.02279
## sj_dem_distancing_pre_post$`% Asian` 0.08269 0.01403
## sj_dem_distancing_pre_post$`percent associates or higher` 0.11267 0.02168
## sj_dem_distancing_pre_post$`percent 20-29` -0.21529 0.03882
## t value Pr(>|t|)
## (Intercept) 12.216 < 2e-16 ***
## sj_dem_distancing_pre_post$`% over 125,000` 7.634 9.78e-14 ***
## sj_dem_distancing_pre_post$`% Asian` 5.895 6.45e-09 ***
## sj_dem_distancing_pre_post$`percent associates or higher` 5.197 2.84e-07 ***
## sj_dem_distancing_pre_post$`percent 20-29` -5.546 4.50e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.993 on 564 degrees of freedom
## Multiple R-squared: 0.4677, Adjusted R-squared: 0.464
## F-statistic: 123.9 on 4 and 564 DF, p-value: < 2.2e-16
frac_model_inc_asian_educ_youngadult <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`percent 20-29`)
summary(frac_model_inc_asian_educ_youngadult)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +
## sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` +
## sj_dem_distancing_pre_post$`percent 20-29`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.94950 -0.27087 -0.00095 0.24284 2.42641
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.5451457 0.0800736
## sj_dem_distancing_pre_post$`% over 125,000` 0.0124857 0.0016120
## sj_dem_distancing_pre_post$`% Asian` 0.0009829 0.0009921
## sj_dem_distancing_pre_post$`percent associates or higher` 0.0087366 0.0015337
## sj_dem_distancing_pre_post$`percent 20-29` -0.0171748 0.0027458
## t value Pr(>|t|)
## (Intercept) 6.808 2.54e-11 ***
## sj_dem_distancing_pre_post$`% over 125,000` 7.746 4.44e-14 ***
## sj_dem_distancing_pre_post$`% Asian` 0.991 0.322
## sj_dem_distancing_pre_post$`percent associates or higher` 5.697 1.97e-08 ***
## sj_dem_distancing_pre_post$`percent 20-29` -6.255 7.86e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4946 on 564 degrees of freedom
## Multiple R-squared: 0.453, Adjusted R-squared: 0.4491
## F-statistic: 116.7 on 4 and 564 DF, p-value: < 2.2e-16
Though looking at percent less than 30 doesn’t have predictive power with these variables, percent of young adults does.
difs_model_inc_asian_educ_child <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`percent less than 18`)
summary(difs_model_inc_asian_educ_child)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` +
## sj_dem_distancing_pre_post$`percent associates or higher` +
## sj_dem_distancing_pre_post$`percent less than 18`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.178 -3.662 0.894 4.515 16.915
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 2.47455 1.39831
## sj_dem_distancing_pre_post$`% over 125,000` 0.18418 0.02200
## sj_dem_distancing_pre_post$`% Asian` 0.08819 0.01397
## sj_dem_distancing_pre_post$`percent associates or higher` 0.14233 0.02218
## sj_dem_distancing_pre_post$`percent less than 18` 0.28365 0.04474
## t value Pr(>|t|)
## (Intercept) 1.770 0.0773 .
## sj_dem_distancing_pre_post$`% over 125,000` 8.372 4.49e-16 ***
## sj_dem_distancing_pre_post$`% Asian` 6.314 5.53e-10 ***
## sj_dem_distancing_pre_post$`percent associates or higher` 6.416 2.97e-10 ***
## sj_dem_distancing_pre_post$`percent less than 18` 6.339 4.73e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.938 on 564 degrees of freedom
## Multiple R-squared: 0.476, Adjusted R-squared: 0.4723
## F-statistic: 128.1 on 4 and 564 DF, p-value: < 2.2e-16
frac_model_inc_asian_educ_child <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`percent less than 18`)
summary(frac_model_inc_asian_educ_child)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +
## sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` +
## sj_dem_distancing_pre_post$`percent less than 18`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.89245 -0.26883 0.00138 0.24275 2.69787
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -0.2637545 0.1000841
## sj_dem_distancing_pre_post$`% over 125,000` 0.0136814 0.0015746
## sj_dem_distancing_pre_post$`% Asian` 0.0013107 0.0009998
## sj_dem_distancing_pre_post$`percent associates or higher` 0.0106285 0.0015879
## sj_dem_distancing_pre_post$`percent less than 18` 0.0187744 0.0032025
## t value Pr(>|t|)
## (Intercept) -2.635 0.00864 **
## sj_dem_distancing_pre_post$`% over 125,000` 8.689 < 2e-16 ***
## sj_dem_distancing_pre_post$`% Asian` 1.311 0.19040
## sj_dem_distancing_pre_post$`percent associates or higher` 6.693 5.27e-11 ***
## sj_dem_distancing_pre_post$`percent less than 18` 5.862 7.77e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4966 on 564 degrees of freedom
## Multiple R-squared: 0.4486, Adjusted R-squared: 0.4447
## F-statistic: 114.7 on 4 and 564 DF, p-value: < 2.2e-16
Similarly, looking at percent of children is relevant as well.
difs_model_inc_asian_educ_child_yad <- lm(sj_dem_distancing_pre_post$`% increase in staying completely home` ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`percent less than 18` + sj_dem_distancing_pre_post$`percent 20-29`)
summary(difs_model_inc_asian_educ_child_yad)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$`% increase in staying completely home` ~
## sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` +
## sj_dem_distancing_pre_post$`percent associates or higher` +
## sj_dem_distancing_pre_post$`percent less than 18` + sj_dem_distancing_pre_post$`percent 20-29`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.311 -3.331 0.836 4.324 16.506
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 6.85199 1.91126
## sj_dem_distancing_pre_post$`% over 125,000` 0.16596 0.02248
## sj_dem_distancing_pre_post$`% Asian` 0.08802 0.01385
## sj_dem_distancing_pre_post$`percent associates or higher` 0.13763 0.02204
## sj_dem_distancing_pre_post$`percent less than 18` 0.21799 0.04854
## sj_dem_distancing_pre_post$`percent 20-29` -0.13899 0.04179
## t value Pr(>|t|)
## (Intercept) 3.585 0.000366 ***
## sj_dem_distancing_pre_post$`% over 125,000` 7.382 5.65e-13 ***
## sj_dem_distancing_pre_post$`% Asian` 6.357 4.24e-10 ***
## sj_dem_distancing_pre_post$`percent associates or higher` 6.246 8.31e-10 ***
## sj_dem_distancing_pre_post$`percent less than 18` 4.491 8.62e-06 ***
## sj_dem_distancing_pre_post$`percent 20-29` -3.326 0.000938 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.877 on 563 degrees of freedom
## Multiple R-squared: 0.4861, Adjusted R-squared: 0.4816
## F-statistic: 106.5 on 5 and 563 DF, p-value: < 2.2e-16
frac_model_inc_asian_educ_child_yad <- lm(sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` + sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` + sj_dem_distancing_pre_post$`percent less than 18` + sj_dem_distancing_pre_post$`percent 20-29`)
summary(frac_model_inc_asian_educ_child_yad)
##
## Call:
## lm(formula = sj_dem_distancing_pre_post$frac_increase ~ sj_dem_distancing_pre_post$`% over 125,000` +
## sj_dem_distancing_pre_post$`% Asian` + sj_dem_distancing_pre_post$`percent associates or higher` +
## sj_dem_distancing_pre_post$`percent less than 18` + sj_dem_distancing_pre_post$`percent 20-29`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.97058 -0.27467 0.00494 0.25271 2.55282
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.1363538 0.1359484
## sj_dem_distancing_pre_post$`% over 125,000` 0.0120160 0.0015992
## sj_dem_distancing_pre_post$`% Asian` 0.0012952 0.0009848
## sj_dem_distancing_pre_post$`percent associates or higher` 0.0101988 0.0015674
## sj_dem_distancing_pre_post$`percent less than 18` 0.0127733 0.0034529
## sj_dem_distancing_pre_post$`percent 20-29` -0.0127039 0.0029722
## t value Pr(>|t|)
## (Intercept) 1.003 0.316301
## sj_dem_distancing_pre_post$`% over 125,000` 7.514 2.27e-13 ***
## sj_dem_distancing_pre_post$`% Asian` 1.315 0.188973
## sj_dem_distancing_pre_post$`percent associates or higher` 6.507 1.70e-10 ***
## sj_dem_distancing_pre_post$`percent less than 18` 3.699 0.000237 ***
## sj_dem_distancing_pre_post$`percent 20-29` -4.274 2.25e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4892 on 563 degrees of freedom
## Multiple R-squared: 0.4659, Adjusted R-squared: 0.4612
## F-statistic: 98.24 on 5 and 563 DF, p-value: < 2.2e-16
From the results presented above, we see that income (making over $125,000) predicts about 37% of the variability in percent of devices leaving the home across blockgroups. Adding in education leads to a prediction of about 40% of the variation, and including percent of residents that are Asian with both education and income adds about 2% predictive power. Adding both percent of residents that are children as well as percent of residents ages 20-29 raises the regression to predicting about 47% of the variation in the data.
# another collection for pre shelter in place behavior
sj_dem_distancing_pre_shelter <- sj_dem_distancing %>%
dplyr::select(-`% not completely at home`) %>%
left_join(sj_internet_by_block %>% dplyr::select(`% not completely at home pre shelter`, blockgroup))
# relabel column for leaving home
colnames(sj_dem_distancing_pre_shelter)[ncol(sj_dem_distancing_pre_shelter)] <- "% not completely at home"
sj_dem_distancing[is.na(sj_dem_distancing)] <- 0
sj_dem_distancing_pre_shelter[is.na(sj_dem_distancing_pre_shelter)] <- 0
# add column indicating before or after shelter in place, then bind the two sets of data
sj_dem_distancing_pre_shelter <- sj_dem_distancing_pre_shelter %>%
mutate(
income_trendline =
fitted(lm(sj_dem_distancing_pre_shelter$`% not completely at home` ~ sj_dem_distancing_pre_shelter$`% over 125,000`))
) %>%
cbind(`Before or After Shelter-in-Place` = "before")
sj_dem_distancing <-
sj_dem_distancing %>%
mutate(
income_trendline =
fitted(lm(sj_dem_distancing$`% not completely at home` ~ sj_dem_distancing$`% over 125,000`))
) %>%
cbind(`Before or After Shelter-in-Place` = "after") %>%
rbind(sj_dem_distancing_pre_shelter)
# try animating
fig <-
plot_ly (sj_dem_distancing) %>%
add_trace(
x = ~`% over 125,000`,
y = ~`% not completely at home`,
frame = ~`Before or After Shelter-in-Place`,
type = 'scatter',
mode = 'markers'
) %>%
add_trace(
x = ~`% over 125,000`,
y = ~income_trendline,
type = 'scatter',
mode = 'lines',
line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
frame = ~`Before or After Shelter-in-Place`
) %>%
animation_button(visible = F)
fig
# # save as rds
# saveRDS(sj_dem_distancing, "/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/sj_sd_dem_data.rds")
# fig <- plot_ly(sj_dem_distancing) %>%
# add_trace(
# x = ~`% over 125,000`,
# y = ~`% not completely at home`,
# frame = ~`Before or After Shelter-in-Place`,
# type = "scatter",
# mode = "markers",
# name = "Under $125,000",
# marker = list(size = 5, color = 'rgba(50, 150, 200, 1)'),
# visible = T
# ) %>%
# add_trace(
# x = ~`% over 125,000`,
# y = fitted(lm(sj_dem_distancing$`% not completely at home` ~ sj_dem_distancing$`% over 125,000`)),
# name = 'trendline',
# mode = 'lines',
# line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
# frame = ~`Before or After Shelter-in-Place`,
# visible = F
# ) %>%
# add_trace(
# x = ~`% not speaking spanish`,
# y = ~`% not completely at home`,
# frame = ~`Before or After Shelter-in-Place`,
# name = "speak spanish",
# marker = list(size = 5, color = 'rgba(50, 150, 200, 1)'),
# visible = F
# ) %>%
# add_trace(
# x = ~`% not speaking spanish`,
# y = fitted(lm(sj_dem_distancing$`% not completely at home` ~ sj_dem_distancing$`% not speaking spanish`)),
# name = 'trendline',
# mode = 'lines',
# line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
# frame = ~`Before or After Shelter-in-Place`,
# visible = F
# ) %>%
# add_trace(
# x = ~`percent associates or higher`,
# y = ~`% not completely at home`,
# frame = ~`Before or After Shelter-in-Place`,
# name = "percent higher degree",
# marker = list(size = 5, color = 'rgba(50, 150, 200, 1)'),
# visible = F
# ) %>%
# add_trace(
# x = ~`percent associates or higher`,
# y = fitted(lm(sj_dem_distancing$`% not completely at home` ~ sj_dem_distancing$`percent associates or higher`)),
# name = 'trendline',
# mode = 'lines',
# line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
# frame = ~`Before or After Shelter-in-Place`,
# visible = F
# ) %>%
# add_trace(
# x = ~`percent high speed`,
# y = ~`% not completely at home`,
# frame = ~`Before or After Shelter-in-Place`,
# name = "percent high speed internet access",
# marker = list(size = 5, color = 'rgba(50, 150, 200, 1)'),
# visible = F
# ) %>%
# add_trace(
# x = ~`percent high speed`,
# y = fitted(lm(sj_dem_distancing$`% not completely at home` ~ sj_dem_distancing$`percent high speed`)),
# name = 'trendline',
# mode = 'lines',
# line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
# frame = ~sj_dem_distancing$`Before or After Shelter-in-Place`,
# visible = F
# ) %>%
# add_trace(
# x = ~`percent less than 30`,
# y = ~`% not completely at home`,
# frame = ~`Before or After Shelter-in-Place`,
# name = "percent less than 30",
# marker = list(size = 5, color = 'rgba(50, 150, 200, 1)'),
# visible = F
# ) %>%
# add_trace(
# x = ~`percent less than 30`,
# y = fitted(lm(sj_dem_distancing$`% not completely at home` ~ sj_dem_distancing$`percent less than 30`)),
# name = 'trendline',
# mode = 'lines',
# line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
# frame = ~`Before or After Shelter-in-Place`,
# visible = F
# ) %>%
# layout(
# updatemenus = list(
# list(
# active = 0,
# type = 'buttons',
# buttons = list(
# list(
# label = "Households Under $125,000",
# method = 'update',
# args = list(list(visible = c(T, T, F, F, F, F, F, F, F, F)),
# list(title = "Under $125,000",
# xaxis = list(title = "% Households Under $125,000 in Income")))),
# list(
# label = "Speaking Spanish",
# method = 'update',
# args = list(list(visible = c(F, F, T, T, F, F, F, F, F, F)),
# list(title = "Not Speaking Spanish",
# xaxis = list(title = "% Residents Not Speaking Spanish")))),
# list(
# label = "Education Level",
# method = 'update',
# args= list(list(visible = c(F, F, F, F, T, T, F, F, F, F)),
# list(xaxis = list(title = "% Residents With Associate's Degree or Higher")))),
# list(
# label = "High Speed Internet",
# method = 'update',
# args= list(list(visible = c(F, F, F, F, F, F, T, T, F, F)),
# list(xaxis = list(title = "% Households With High Speed Internet Access")))),
# list(
# label = "Young Population",
# method = 'update',
# args= list(list(visible = c(F, F, F, F, F, F, T, T, F, F)),
# list(xaxis = list(title = "% Residents Under Age 30"))))
# )
# )
# ),
# yaxis = list(title = "% Residents Leaving Home",
# font = list(size = 15)),
# showlegend = FALSE
# )
# fig
Experimentation with other variables and other ways of analyzing the social distancing data. First I look at a few other possible variables. I start with units in the structure.
# try getting other variables
# get data on units in structure
sj_units_in_structure_by_block <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B25024)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "units"), sep = "!!") %>%
filter(!is.na(units)) %>%
spread(key = units, value = estimate) %>%
mutate(total_nums = `1, attached` + `1, detached` + `10 to 19` + `2` + `20 to 49`+ `3 or 4` + `5 to 9`+ `50 or more`+ `Boat, RV, van, etc.`+ `Mobile home`, `percent 20 or more` = (`20 to 49`+`50 or more`)* 100/ total_nums, `percent 1 only` = (`1, attached` + `1, detached`)*100/total_nums, `percent > 1` = 100 - `percent 1 only`) %>%
left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
# plot
sj_units_in_structure_by_block %>%
ggplot(aes(
x = `percent 20 or more`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of structures with more than 20 units",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and 20 or More Units Per Structure"
)
summary(lm(`% not completely at home` ~ `percent 20 or more`, sj_units_in_structure_by_block))
##
## Call:
## lm(formula = `% not completely at home` ~ `percent 20 or more`,
## data = sj_units_in_structure_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.476 -4.660 -0.255 4.248 35.930
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 51.47635 0.39054 131.810 <2e-16 ***
## `percent 20 or more` 0.01029 0.01957 0.526 0.599
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.07 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.0004885, Adjusted R-squared: -0.001277
## F-statistic: 0.2766 on 1 and 566 DF, p-value: 0.5991
sj_units_in_structure_by_block %>%
ggplot(aes(
x = `percent 1 only`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of structures with only one unit",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Only 1 Unit Per Structure"
)
summary(lm(`% not completely at home` ~ `percent 1 only`, sj_units_in_structure_by_block))
##
## Call:
## lm(formula = `% not completely at home` ~ `percent 1 only`, data = sj_units_in_structure_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.404 -4.843 -0.315 4.438 36.073
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.28005 0.85487 64.665 < 2e-16 ***
## `percent 1 only` -0.05115 0.01088 -4.699 3.28e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.919 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.03755, Adjusted R-squared: 0.03585
## F-statistic: 22.08 on 1 and 566 DF, p-value: 3.283e-06
Household type and size:
# load data on household type and size
sj_house_size_type_by_block <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = "group(B11016)"
) %>%
mutate(
blockgroup =
paste0(state,county,tract,block_group)
) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "type", "size"), sep = "!!") %>%
filter(!is.na(type))
# household type
sj_house_type_by_block <- sj_house_size_type_by_block %>%
filter(is.na(size)) %>%
dplyr::select(-size) %>%
spread(key = type, value = estimate) %>%
mutate(`total households` = `Family households` + `Nonfamily households`, `percent nonfamily` = `Nonfamily households` / `total households`) %>%
left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
sj_house_type_by_block %>%
ggplot(aes(
x = `percent nonfamily`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent nonfamily households",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Household Type"
)
summary(lm(`% not completely at home` ~ `percent nonfamily`, sj_house_type_by_block))
##
## Call:
## lm(formula = `% not completely at home` ~ `percent nonfamily`,
## data = sj_house_type_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.641 -4.755 -0.149 4.389 38.360
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.3397 0.6121 80.612 < 2e-16 ***
## `percent nonfamily` 9.2639 2.1244 4.361 1.54e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.939 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.0325, Adjusted R-squared: 0.03079
## F-statistic: 19.02 on 1 and 566 DF, p-value: 1.541e-05
# household size
sj_house_size_by_block <- sj_house_size_type_by_block %>%
filter(!is.na(size)) %>%
dplyr::select(-type) %>%
group_by(blockgroup, size) %>%
summarize(`total of this size` = sum(estimate)) %>%
spread(key = size, value = `total of this size`) %>%
mutate(total_nums = `1-person household` + `2-person household` + `3-person household` + `4-person household` + `5-person household`+ `6-person household` + `7-or-more person household`, `percent 5 or more` = (`5-person household`+`6-person household` + `7-or-more person household`)* 100/ total_nums, `percent 1 or 2 only` = (`1-person household` + `2-person household`)*100/total_nums) %>%
left_join(sj_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
sj_house_size_by_block %>%
ggplot(aes(
x = `percent 5 or more`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households with 5 or more people",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Households With 5 or More"
)
summary(lm(`% not completely at home` ~ `percent 5 or more`, sj_house_size_by_block))
##
## Call:
## lm(formula = `% not completely at home` ~ `percent 5 or more`,
## data = sj_house_size_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.937 -4.541 -0.417 4.061 34.347
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.00139 0.53948 92.685 < 2e-16 ***
## `percent 5 or more` 0.09024 0.02421 3.727 0.000213 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.974 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.02396, Adjusted R-squared: 0.02223
## F-statistic: 13.89 on 1 and 566 DF, p-value: 0.000213
sj_house_size_by_block %>%
ggplot(aes(
x = `percent 1 or 2 only`,
y = `% not completely at home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households with 1 or 2 people",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Small Household Size"
)
summary(lm(`% not completely at home` ~ `percent 1 or 2 only`, sj_house_size_by_block))
##
## Call:
## lm(formula = `% not completely at home` ~ `percent 1 or 2 only`,
## data = sj_house_size_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.861 -4.821 -0.079 4.332 35.732
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.61141 0.93470 54.15 <2e-16 ***
## `percent 1 or 2 only` 0.02158 0.01944 1.11 0.267
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.063 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.002173, Adjusted R-squared: 0.0004097
## F-statistic: 1.232 on 1 and 566 DF, p-value: 0.2674
Next I consider different ways of looking at the social distancing data. First I try distance traveled.
# try other ways of looking at the social distancing data
# first look at total distance traveled
sj_sd_distance <- sj_socialdistancing %>%
filter(date > shelter_start) %>%
group_by(origin_census_block_group) %>%
summarize(total_dist_traveled = sum(distance_traveled_from_home), device_count = sum(device_count)) %>%
mutate(total_dist_per_device = total_dist_traveled / device_count)
sj_distance_testing <- left_join(sj_ami_by_block, sj_sd_distance, by = c("blockgroup" = "origin_census_block_group")) %>% left_join(sj_age_by_block %>% dplyr::select(blockgroup, `percent less than 30`))
sj_distance_testing %>% filter(total_dist_per_device < 500) %>%
ggplot(aes(
x = `% over 75,000`,
y = total_dist_per_device
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $75,000 (50% AMI) annually",
y = "Average distance traveled per device during weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Income, Distance Metric"
)
This is very skewed by outliers, and probably not a useful metric.
Now I consider including devices that traveled <1km as staying at (or near) home.
sj_sd_range <- sj_socialdistancing %>%
filter(weekend == F) %>%
filter(date > shelter_start) %>%
mutate(travel_buckets_split = lapply(bucketed_distance_traveled, function(x) strsplit(x, "<1000")[[1]][2]), less_than_1km = lapply(travel_buckets_split, function(x) strsplit(x, ":")[[1]][2]), less_than_1km = lapply(less_than_1km, function(x) strsplit(x, ",")[[1]][1])) %>%
mutate(less_than_1km = lapply(less_than_1km, function(x) str_remove(x, "[}]"))) %>% # clean a bit more
mutate(less_than_1km = as.numeric(less_than_1km), less_than_1km = replace_na(less_than_1km, 0)) %>%
mutate(home_or_1km = completely_home_device_count + less_than_1km) %>%
group_by(origin_census_block_group) %>%
summarize(home_or_1km = sum(home_or_1km), device_count = sum(device_count)) %>%
mutate(`% Within 1km of Home` = (home_or_1km/device_count*100) %>% round(1), `% farther than 1km` = (100-`% Within 1km of Home`))
# join this with other data
sj_1km_testing <- left_join(sj_ami_by_block, sj_sd_range, by = c("blockgroup" = "origin_census_block_group")) %>%
left_join(sj_occupants_per_room_by_block %>% dplyr::select(`percent less than 1`, blockgroup)) %>%
left_join(sj_age_by_block %>% dplyr::select(`percent less than 30`, blockgroup)) %>%
left_join(sj_lang_by_block %>% dplyr::select(`% speaking english > well`, blockgroup))
# plot with income
sj_1km_testing %>%
ggplot(aes(
x = `% over 75,000`,
y = `% farther than 1km`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $75,000 (50% AMI) annually",
y = "% of devices going farther than 1km of home, weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Income, 1km Range"
)
summary(lm(`% farther than 1km` ~ `% over 75,000`, sj_1km_testing))
##
## Call:
## lm(formula = `% farther than 1km` ~ `% over 75,000`, data = sj_1km_testing)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.068 -4.630 -0.633 4.135 32.635
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 64.27172 1.07390 59.85 <2e-16 ***
## `% over 75,000` -0.20381 0.01655 -12.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.169 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2113, Adjusted R-squared: 0.2099
## F-statistic: 151.6 on 1 and 566 DF, p-value: < 2.2e-16
# plot with age
sj_1km_testing %>%
ggplot(aes(
x = `percent less than 30`,
y = `% farther than 1km`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of people younger than 30",
y = "Percent of devices farther than 1km of home during weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Age, 1km Range"
)
summary(lm(`% farther than 1km` ~ `percent less than 30`, sj_1km_testing))
##
## Call:
## lm(formula = `% farther than 1km` ~ `percent less than 30`, data = sj_1km_testing)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.305 -4.595 -0.326 4.013 39.401
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 44.89889 1.46159 30.719 < 2e-16 ***
## `percent less than 30` 0.17542 0.03705 4.735 2.77e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.014 on 567 degrees of freedom
## Multiple R-squared: 0.03803, Adjusted R-squared: 0.03634
## F-statistic: 22.42 on 1 and 567 DF, p-value: 2.775e-06
# run multiple regression model
modeltest2 <- lm(sj_1km_testing$`% farther than 1km` ~ sj_1km_testing$`% over 75,000` + sj_1km_testing$`percent less than 30` + sj_1km_testing$`% speaking english > well` + sj_1km_testing$`percent less than 1`)
summary(modeltest2)
##
## Call:
## lm(formula = sj_1km_testing$`% farther than 1km` ~ sj_1km_testing$`% over 75,000` +
## sj_1km_testing$`percent less than 30` + sj_1km_testing$`% speaking english > well` +
## sj_1km_testing$`percent less than 1`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.815 -4.536 -0.558 4.267 32.459
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 59.20631 4.51314 13.119 <2e-16
## sj_1km_testing$`% over 75,000` -0.21163 0.02083 -10.159 <2e-16
## sj_1km_testing$`percent less than 30` 0.03029 0.04104 0.738 0.4607
## sj_1km_testing$`% speaking english > well` 0.10487 0.04431 2.367 0.0183
## sj_1km_testing$`percent less than 1` -0.05447 0.04500 -1.210 0.2266
##
## (Intercept) ***
## sj_1km_testing$`% over 75,000` ***
## sj_1km_testing$`percent less than 30`
## sj_1km_testing$`% speaking english > well` *
## sj_1km_testing$`percent less than 1`
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.14 on 563 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2217, Adjusted R-squared: 0.2162
## F-statistic: 40.09 on 4 and 563 DF, p-value: < 2.2e-16
It looks like the fit of these selected variables is slightly better for the social distancing data based on not traveling farther than 1km.
Now I also consider “non-work” behavior.
sj_nonworking_by_block <- sj_socialdistancing %>%
filter(weekend == F) %>%
filter(date > shelter_start) %>%
mutate(nonworking = device_count - completely_home_device_count - part_time_work_behavior_devices - full_time_work_behavior_devices) %>%
group_by(origin_census_block_group) %>%
summarize(nonworking_count = sum(nonworking), total_device = sum(device_count)) %>%
mutate(nonworking_percent = nonworking_count*100 / total_device, percent_only_work_home = 100-nonworking_percent) %>%
left_join(sj_1km_testing %>% dplyr::select(`% over 75,000`, `percent less than 30`, `% speaking english > well`, `percent less than 1`, blockgroup), by = c("origin_census_block_group" = "blockgroup"))
# plot against age and income
sj_nonworking_by_block %>%
ggplot(aes(
x = `% over 75,000`,
y = nonworking_percent
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $75,000 (50% AMI) annually",
y = "Percent of devices leaving home for non-work purposes during weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Income, Nonworking Behavior"
)
summary(lm(nonworking_percent ~ `% over 75,000`, sj_nonworking_by_block))
##
## Call:
## lm(formula = nonworking_percent ~ `% over 75,000`, data = sj_nonworking_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.5428 -3.8965 -0.6847 3.4523 31.5874
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52.67676 0.97773 53.88 <2e-16 ***
## `% over 75,000` -0.15797 0.01507 -10.48 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.527 on 566 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1626, Adjusted R-squared: 0.1611
## F-statistic: 109.9 on 1 and 566 DF, p-value: < 2.2e-16
sj_nonworking_by_block %>%
ggplot(aes(
x = `percent less than 30`,
y = nonworking_percent
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of people younger than 30",
y = "Percent of devices leaving home for non-work purposes during weekdays since shelter-in-place",
title = "San Jose: Social Distancing and Age, Nonworking Behavior"
)
summary(lm(nonworking_percent ~ `percent less than 30`, sj_nonworking_by_block))
##
## Call:
## lm(formula = nonworking_percent ~ `percent less than 30`, data = sj_nonworking_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.960 -4.132 -0.279 3.228 30.570
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36.27595 1.26879 28.591 < 2e-16 ***
## `percent less than 30` 0.17114 0.03216 5.321 1.49e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.957 on 567 degrees of freedom
## Multiple R-squared: 0.04756, Adjusted R-squared: 0.04588
## F-statistic: 28.32 on 1 and 567 DF, p-value: 1.486e-07
# multiple regression model
modeltest3 <- lm(sj_nonworking_by_block$nonworking_percent ~ sj_nonworking_by_block$`% over 75,000` + sj_nonworking_by_block$`percent less than 30` + sj_nonworking_by_block$`% speaking english > well` + sj_nonworking_by_block$`percent less than 1`)
summary(modeltest3)
##
## Call:
## lm(formula = sj_nonworking_by_block$nonworking_percent ~ sj_nonworking_by_block$`% over 75,000` +
## sj_nonworking_by_block$`percent less than 30` + sj_nonworking_by_block$`% speaking english > well` +
## sj_nonworking_by_block$`percent less than 1`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.3385 -3.8983 -0.7328 3.4219 29.7824
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 44.59442 4.09629 10.887
## sj_nonworking_by_block$`% over 75,000` -0.16347 0.01891 -8.646
## sj_nonworking_by_block$`percent less than 30` 0.07344 0.03725 1.972
## sj_nonworking_by_block$`% speaking english > well` 0.08823 0.04022 2.194
## sj_nonworking_by_block$`percent less than 1` -0.02462 0.04084 -0.603
## Pr(>|t|)
## (Intercept) <2e-16 ***
## sj_nonworking_by_block$`% over 75,000` <2e-16 ***
## sj_nonworking_by_block$`percent less than 30` 0.0491 *
## sj_nonworking_by_block$`% speaking english > well` 0.0287 *
## sj_nonworking_by_block$`percent less than 1` 0.5469
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.48 on 563 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1788, Adjusted R-squared: 0.1729
## F-statistic: 30.64 on 4 and 563 DF, p-value: < 2.2e-16
These variables do worse for the percent nonworking metric, which makes sense.